Esempio n. 1
0
def process(count):
    import run
    run.config_logging()
    nametoid_to_json_name = 'nametoid' + str(count) + '.json'
    crawler = NameToIDCrawler('./enterprise_crawler/nametoid/' +
                              str(nametoid_to_json_name))
    nametoid_name = 'nametoid' + str(count) + '.txt'
    enterprise_list = get_enterprise_list('./enterprise_list/' +
                                          str(nametoid_name))
    print(len(enterprise_list))
    for ent_name in enterprise_list:
        ent_name = str(ent_name).rstrip('\n')
        print(
            '############   Start to crawl nametoid %d with name %s   ################\n'
            % (count, ent_name))
        crawler.run(ent_name=ent_name)
Esempio n. 2
0
        'open_detail_info_entry': ''
    }

    def __init__(self, json_restore_path):
        ZongjuCrawler.__init__(self, json_restore_path)
        self.json_restore_path = json_restore_path
        self.parser = HunanParser(self)


class HunanParser(ZongjuParser):
    def __init__(self, crawler):
        self.crawler = crawler


if __name__ == '__main__':
    from CaptchaRecognition import CaptchaRecognition
    import run
    run.config_logging()
    HunanCrawler.code_cracker = CaptchaRecognition('hunan')

    crawler = HunanCrawler('./enterprise_crawler/hunan.json')
    enterprise_list = CrawlerUtils.get_enterprise_list(
        './enterprise_list/hunan.txt')
    # enterprise_list = ['430000000011972']
    for ent_number in enterprise_list:
        ent_number = ent_number.rstrip('\n')
        settings.logger.info(
            '###################   Start to crawl enterprise with id %s   ###################\n'
            % ent_number)
        crawler.run(ent_number=ent_number)