Esempio n. 1
0
    def get_company_info(self, response):
        compass_name = response.xpath(
            '//input[@id="corpName"]/@value').extract_first()
        base_nodes = response.xpath('//div[@class="t_end"]/ul//tr/td/text()')

        info_list = []
        for i, node in enumerate(base_nodes):
            if i % 2 == 0:
                continue
            info_list.append(node.extract())
        [honor_code, representative, compass_type, provice,
         operating_addr] = info_list
        company_item = CompassItem({  # 自动检查key是否合法
            'compass_name': compass_name,
            'compass_link': response.url,
            'honor_code': honor_code,
            'representative': representative,
            'compass_type': compass_type,
            'provice': provice,
            'operating_addr': operating_addr,
            'establish_time': None,
            'register_capital': None,
            'net_asset': None,
            # 'crawl_time': self.fmt_time()
        })
        return [company_item]
 def get_company_info(self, response):
     nodes = response.xpath('//div[@class="basic_infor"]//tbody/tr')
     compass_name = nodes[0].xpath(
         './td[@class="name_level3"]/text()').extract_first().strip()
     honor_code = nodes[1].xpath(
         './td[@id="LicenseNum"]/text()').extract_first().strip()
     representive = nodes[2].xpath(
         './td[@id="LegalMan"]/text()').extract_first().strip()
     compass_type = nodes[2].xpath(
         './td[@id="EconType"]/text()').extract_first().strip()
     provice = nodes[3].xpath(
         './td[@id="Td1"]/text()').extract_first().strip()
     operating_addr = nodes[3].xpath(
         './td[@id="Description"]').extract_first().strip()
     company_item = CompassItem({   # 自动检查key是否合法
          'compass_name': compass_name,
          'compass_link': response.url,
          'honor_code': honor_code,   # 信用代码
          'representative': representive,  # 法人
          'compass_type': compass_type,   # 公司类型
          'provice': provice,
          'operating_addr': operating_addr,   # 运营地址
          'establish_time': None,
          'register_capital': None,
          'net_asset': None,
     })
     return [company_item]
 def parse_compass_info(self, unit, url):
     company_item = CompassItem({  # 自动检查key是否合法
         'compass_name': unit['CorpName'],
         'compass_link': url,
         'honor_code': unit['CorpCode'],  # 信用代码
         'representative': unit['LegalMan'],  # 法人
         'compass_type': unit['EconomicNum'],  # 公司类型
         'provice': ''.join(unit['AreaName'].split('·')[:1]),
         'operating_addr': unit['Address'],  # 运营地址
         'establish_time': 'None',
         'register_capital': unit['RegPrin'],
         'net_asset': None,
     })
     return [company_item]
Esempio n. 4
0
 def get_company_info(self, response):
     nodes = response.xpath('//div[@class="basic_infor"]//tbody/tr')
     company_item = CompassItem({   # 自动检查key是否合法
          'compass_name': nodes.xpath('./td[@class="name_level3"]').extract()[0],
          'compass_link': response.url,
          'honor_code': nodes.xpath('./td[@id="LicenseNum"]').extract()[0],   # 信用代码
          'representative': nodes.xpath('./td[@id="LegalMan"]').extract()[0],  # 法人
          'compass_type': nodes.xpath('./td[@id="EconType"]').extract()[0],   # 公司类型
          'provice': ''.join(nodes.xpath('./td[@id="Td1"]').extract()),
          'operating_addr': ''.join(nodes.xpath('./td[@id="Description"]')),   # 运营地址
          'establish_time': None,
          'register_capital': None,
          'net_asset': None,
     })
     return [company_item]
Esempio n. 5
0
 def extract_compass_info(self, resp_detail, com_rules):
     response = resp_detail
     node = response.xpath(com_rules.get('cnodes')[0])[0]
     company_item = CompassItem()
     company_item['compass_link'] = response.url
     for k, v in com_rules.items():
         if 'node' in k:
             continue
         rule, map_key = v[0], v[1]
         if v[0] is None:
             company_item[map_key] = ''
         else:
             company_item[map_key] = node.xpath(
                 rule).extract_first().replace('\n', '').replace(
                     '\t', '').replace('\r', '').replace('  ', '')
     return [company_item]
 def get_company_info(self, response):
     compass_name = ''.join(
         response.xpath(
             '//div[@class="tLayer-1"]/h3/text()').extract()).strip()
     honor_code, register_capital = response.xpath(
         '//div[@class="tLayer-1"]/table/tr[1]/td[not(@class)]/text()'
     ).extract()
     honor_code = 'None' if len(honor_code) < 7 else honor_code
     representive = ''.join(
         response.xpath(
             '//div[@class="tLayer-1"]/table/tr[2]/td[not(@class)][1]/text()'
         ).extract())
     compass_type = response.xpath(
         '//div[@class="tLayer-1"]/table/tr[3]/td[not(@class)]/text()'
     ).extract()[0]
     establish_time = ''.join(
         response.xpath(
             '//div[@class="tLayer-1"]/table/tr[4]/td[not(@class)][2]/text()'
         ).extract()).strip()
     provice = ''.join(
         response.xpath(
             '//div[@class="tLayer-1"]/table/tr[5]/td[not(@class)][2]/text()'
         ).extract())
     operating_addr = ''.join(
         response.xpath(
             '//div[@class="tLayer-1"]/table/tr[6]/td[not(@class)][1]/text()'
         ).extract())
     company_item = CompassItem({  # 自动检查key是否合法
         'compass_name': compass_name,
         'compass_link': response.url,
         'honor_code': honor_code,  # 信用代码
         'representative': representive,  # 法人
         'compass_type': compass_type,  # 公司类型
         'provice': provice,
         'operating_addr': operating_addr,  # 运营地址
         'establish_time': establish_time,
         'register_capital': register_capital,
         'net_asset': None,
     })
     # print company_item
     return [company_item]