def get_company_info(self, response): compass_name = response.xpath( '//input[@id="corpName"]/@value').extract_first() base_nodes = response.xpath('//div[@class="t_end"]/ul//tr/td/text()') info_list = [] for i, node in enumerate(base_nodes): if i % 2 == 0: continue info_list.append(node.extract()) [honor_code, representative, compass_type, provice, operating_addr] = info_list company_item = CompassItem({ # 自动检查key是否合法 'compass_name': compass_name, 'compass_link': response.url, 'honor_code': honor_code, 'representative': representative, 'compass_type': compass_type, 'provice': provice, 'operating_addr': operating_addr, 'establish_time': None, 'register_capital': None, 'net_asset': None, # 'crawl_time': self.fmt_time() }) return [company_item]
def get_company_info(self, response): nodes = response.xpath('//div[@class="basic_infor"]//tbody/tr') compass_name = nodes[0].xpath( './td[@class="name_level3"]/text()').extract_first().strip() honor_code = nodes[1].xpath( './td[@id="LicenseNum"]/text()').extract_first().strip() representive = nodes[2].xpath( './td[@id="LegalMan"]/text()').extract_first().strip() compass_type = nodes[2].xpath( './td[@id="EconType"]/text()').extract_first().strip() provice = nodes[3].xpath( './td[@id="Td1"]/text()').extract_first().strip() operating_addr = nodes[3].xpath( './td[@id="Description"]').extract_first().strip() company_item = CompassItem({ # 自动检查key是否合法 'compass_name': compass_name, 'compass_link': response.url, 'honor_code': honor_code, # 信用代码 'representative': representive, # 法人 'compass_type': compass_type, # 公司类型 'provice': provice, 'operating_addr': operating_addr, # 运营地址 'establish_time': None, 'register_capital': None, 'net_asset': None, }) return [company_item]
def parse_compass_info(self, unit, url): company_item = CompassItem({ # 自动检查key是否合法 'compass_name': unit['CorpName'], 'compass_link': url, 'honor_code': unit['CorpCode'], # 信用代码 'representative': unit['LegalMan'], # 法人 'compass_type': unit['EconomicNum'], # 公司类型 'provice': ''.join(unit['AreaName'].split('·')[:1]), 'operating_addr': unit['Address'], # 运营地址 'establish_time': 'None', 'register_capital': unit['RegPrin'], 'net_asset': None, }) return [company_item]
def get_company_info(self, response): nodes = response.xpath('//div[@class="basic_infor"]//tbody/tr') company_item = CompassItem({ # 自动检查key是否合法 'compass_name': nodes.xpath('./td[@class="name_level3"]').extract()[0], 'compass_link': response.url, 'honor_code': nodes.xpath('./td[@id="LicenseNum"]').extract()[0], # 信用代码 'representative': nodes.xpath('./td[@id="LegalMan"]').extract()[0], # 法人 'compass_type': nodes.xpath('./td[@id="EconType"]').extract()[0], # 公司类型 'provice': ''.join(nodes.xpath('./td[@id="Td1"]').extract()), 'operating_addr': ''.join(nodes.xpath('./td[@id="Description"]')), # 运营地址 'establish_time': None, 'register_capital': None, 'net_asset': None, }) return [company_item]
def extract_compass_info(self, resp_detail, com_rules): response = resp_detail node = response.xpath(com_rules.get('cnodes')[0])[0] company_item = CompassItem() company_item['compass_link'] = response.url for k, v in com_rules.items(): if 'node' in k: continue rule, map_key = v[0], v[1] if v[0] is None: company_item[map_key] = '' else: company_item[map_key] = node.xpath( rule).extract_first().replace('\n', '').replace( '\t', '').replace('\r', '').replace(' ', '') return [company_item]
def get_company_info(self, response): compass_name = ''.join( response.xpath( '//div[@class="tLayer-1"]/h3/text()').extract()).strip() honor_code, register_capital = response.xpath( '//div[@class="tLayer-1"]/table/tr[1]/td[not(@class)]/text()' ).extract() honor_code = 'None' if len(honor_code) < 7 else honor_code representive = ''.join( response.xpath( '//div[@class="tLayer-1"]/table/tr[2]/td[not(@class)][1]/text()' ).extract()) compass_type = response.xpath( '//div[@class="tLayer-1"]/table/tr[3]/td[not(@class)]/text()' ).extract()[0] establish_time = ''.join( response.xpath( '//div[@class="tLayer-1"]/table/tr[4]/td[not(@class)][2]/text()' ).extract()).strip() provice = ''.join( response.xpath( '//div[@class="tLayer-1"]/table/tr[5]/td[not(@class)][2]/text()' ).extract()) operating_addr = ''.join( response.xpath( '//div[@class="tLayer-1"]/table/tr[6]/td[not(@class)][1]/text()' ).extract()) company_item = CompassItem({ # 自动检查key是否合法 'compass_name': compass_name, 'compass_link': response.url, 'honor_code': honor_code, # 信用代码 'representative': representive, # 法人 'compass_type': compass_type, # 公司类型 'provice': provice, 'operating_addr': operating_addr, # 运营地址 'establish_time': establish_time, 'register_capital': register_capital, 'net_asset': None, }) # print company_item return [company_item]