Beispiel #1
0
 def parse_data(self,response):
     area=response.meta['area']
     #把JOSN 字符串转化成字典
     response=json.loads(response.text)
     datas=response['data']
     # 遍历datas,获取每一个公告信息:
     for data in datas:
         item =DishonestItem()
         #获取通知标题
         notice_title =data['noticeTitle']
         #获取通知内容
         notice_content=data['noticeContent']
         names=re.findall(r'关?于?(.*?)的?列入.*',notice_title)
         item['name'] = names[0]  if len(names) != 0 else "" # 失信人名称
         name_card_num_s=re.findall(r'经?查?,?(.*?)\s*(统一社会信用代码/注册号:(\w+)):.*',notice_content)
         if len(name_card_num_s) != 0:
             if item['name'] == None:
                item['name'] = name_card_num_s[0][0]
             item['card_num'] = name_card_num_s[0][1]
         item['sexy'] = "企业"  # 性别
         item['age'] = 0  # 年龄都是企业年龄都为0
         item['area'] = area  # 区域
         item['business_entity'] = "空"  # 法人(企业)
         item['content'] = notice_content  # 失信内容
         publish_ms = data['noticeDate']  # 公布/宣判日期
         publish_date = datetime.datetime.fromtimestamp(publish_ms / 1000)
         item['publish_date'] = publish_date.strftime('%Y-%m-%d')
         item['publish_unit'] = data['judAuth_CN']  # 公布 / 执行单位
         # item['create_date'] =datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # 创建日期
         item['update_date'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # 更新日期
         yield item
Beispiel #2
0
    def parse_data(self, response):
        """解析数据"""
        # 响应数据
        datas = json.loads(response.text)
        results = jsonpath(datas, '$..result')[0]
        # 遍历结果列表
        for result in results:
            item = DishonestItem()
            #  失信人名称
            item['name'] = result['iname']
            # 失信人号码
            item['card_num'] = result['cardNum']
            # 失信人年龄
            item['age'] = int(result['age'])
            # 区域
            item['area'] = result['areaName']
            # 法人(企业)
            item['business_entity'] = result['businessEntity']
            # 失信内容
            item['content'] = result['duty']
            # 公布日期
            item['publish_date'] = result['publishDate']
            # 公布/执行单位
            item['publish_unit'] = result['courtName']
            # 创建日期
            item['create_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            # 更新日期
            item['update_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            # print(item)
            # 把数据交给引擎
            # yield item

            __import__("pprint").pprint(item)
Beispiel #3
0
 def parse_data(self, response):
     """解析数据"""
     results = json.loads(response.text)
     # 获取失信人信息列表
     datas = results['data']
     # 遍历数据
     for data in datas:
         item = DishonestItem()
         # 失信人名称
         item['name'] = data['name']
         # 失信人号码
         item['card_num'] = data['cardNum']
         # 失信人年龄
         item['age'] = data['age']
         # 区域
         item['area'] = data['areaName']
         # 法人(企业)
         item['business_entity'] = data['buesinessEntity']
         # 失信内容
         item['content'] = data['duty']
         # 公布日期
         item['publish_date'] = data['publishDate']
         # 公布 /执行单位
         item['publish_unit'] = data['courtName']
         # 创建日期
         item['create_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # 更新日期
         item['update_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # print(item)
         yield item
Beispiel #4
0
 def parse_data(self, response):
     datas=json.loads(response.text)
     #获取总条数
     #disp_Num=jsonpath(results,'$..dispNum')[0]
     # print('数据总条数',disp_Num)
     # disp_data=jsonpath(results,'$..disp_data')这是错误的,网页上显示的不一定是对的,真实打印出来是result
     results=jsonpath(datas,'$..result')[0]
     for result in results:
         print(type(result))
         item=DishonestItem()
         item['name']=result['iname']
         # 失信人员名
         # 失信人号码
         item['card_num']=result['cardNum']
         # 失信人年龄
         item['age']=int(result['age'])
         # 区域
         item['area']=result['areaName']
         # 法人(企业)
         item['business_entity']=result['businessEntity']
         # 失信内容
         item['content']=result['duty']
         # 公布日期
         item['publish_date']=result['publishDate'].replace('年','-').replace('月','-').replace('日','')
         # 公布/执行单位
         item['publish_unit']=result['courtName']
         # 创建日期
         item['create_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # 更新日期
         item['update_date']=datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # print(item)
         #把数据交给引擎
         yield item
Beispiel #5
0
 def parse_page(self,response):
     print('解析数据')
     results = json.loads(response.text)
     # print(results)
     #获取失信人信息列表
     datas=results['data']
     print(len(datas))
     for data in datas:
         # print(data)
         item=DishonestItem()
         item['name'] = data['name']
         # 失信人员名
         # 失信人号码
         item['card_num'] = data['cardNum']
         # 失信人年龄
         item['age'] = data['age']
         # 区域
         item['area'] = data['areaName']
         # 法人(企业)
         item['business_entity'] = data['buesinessEntity']
         # 失信内容
         item['content'] = data['duty']
         # 公布日期
         item['publish_date'] = data['publishDate']
         # 公布/执行单位
         item['publish_unit'] = data['courtName']
         # 创建日期
         item['create_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # 更新日期
         item['update_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # print(item)
         # 把数据交给引擎
         yield item
Beispiel #6
0
 def parse_data(self, response):
     datas = json.loads(response.text)
     results = jsonpath(datas, '$..result')[0]
     for result in results:
         item = DishonestItem()
         item['name'] = result['iname']
         item['card_num'] = result['cardNum']
         item['age'] = int(result['age'])
         item['area'] = result['areaName']
         item['business_entity'] = result['businessEntity']
         item['content'] = result['duty']
         item['publish_date'] = result['publishDate']
         item['publish_unit'] = result['courtName']
         item['create_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         item['update_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
         # print(item)
         yield item
Beispiel #7
0
    def parse_data(self, response):
        """取出传递过来的区域"""
        area = response.meta['area']
        # print(response.text)
        # 把json格式字符串, 转换为字典
        results = json.loads(response.text)
        # 获取公告信息列表
        datas = results['data']
        # 遍历datas, 获取每一个公告信息
        for data in datas:
            item = DishonestItem()
            # 获取通知标题
            notice_title = data['noticeTitle']
            # 获取通知内容
            notice_content = data['noticeContent']
            # 失信人名称
            names = re.findall('关?于?(.+?)的?列入.*', notice_title)
            item['name'] = names[0] if len(names) != 0 else ''

            name_card_num_s = re.findall(
                '经?查?,?(.+?)\s*(统一社会信用代码/注册号:(\w+)):.*', notice_content)
            if len(name_card_num_s) != 0:
                item['name'] = name_card_num_s[0][0]
                # 失信人号码
                item['card_num'] = name_card_num_s[0][1]
            # 失信人年龄, 由于都是企业, 年龄都是 0
            item['age'] = 0
            # 区域
            item['area'] = area
            # 法人(企业)
            item['business_entity'] = ''
            # 失信内容
            item['content'] = notice_content
            # 公布日期
            publish_ms = data['noticeDate']
            publish_date = datetime.fromtimestamp(publish_ms / 1000)
            item['publish_date'] = publish_date.strftime('%Y-%m-%d')
            # 公布/执行单位
            item['publish_unit'] = data['judAuth_CN']
            # 创建日期
            item['create_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            # 更新日期
            item['update_date'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            # print(item)
            yield item
Beispiel #8
0
 def parse_data(self, response):
     #取出传递过来的区域
     area = response['area']
     #把json格式字符串,转换为字典
     result = json.loads(response.text)
     #获取公告信息
     datas = result['data']
     #遍历datas,获取给一个公告信息
     for data in datas:
         item = DishonestItem()
         #获取通知标题
         notice_title = data['noticeTitle']
         #获取通知内容
         notice_content = data['noticeContent']
         #失信人名称
         names = re.findall("关?于?将?(.+?)的?列入.*", notice_title)
         item['name'] = names[0] if len(names) != 0 else ''
         name_card_num = re.findall("")
Beispiel #9
0
 def parse_data(self,response):
     #解析数据
     datas=json.loads(response.text)
     results=jsonpath(datas,'$..result')[0]
     #遍历结果列表
     for res in results:
         item=DishonestItem()
         item['name'] = res['iname']  # 失信人名称
         item['sexy'] =res['sexy'] #性别
         item['card_num'] = res['cardNum']  # 号码
         item['age'] =int(res['age'])  # 年龄,企业年龄都为0
         item['area'] = res['areaName']  # 区域
         item['business_entity'] = res['businessEntity']  # 法人(企业)
         item['content'] = res['duty']  # 失信内容
         item['publish_date'] = res['publishDate']  # 公布/宣判日期
         item['publish_unit'] = res['courtName']  # 公布 / 执行单位
         # item['create_date'] =datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # 创建日期
         item['update_date'] =datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # 更新日期
         yield item