gmaps_config = config.get('googlemaps') gmaps_config.update(language='it') loader = LoaderFactory.loader_gmaps_with_cache( gmaps_config=gmaps_config, storage_config=config.get('mongodb') ) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) options.update(doc_factory=doc_factory) mongo_config = config.get('mongodb') connection = MongoClient(mongo_config['host'], mongo_config['port']) counter = CounterMongoDB(counter_name='gmap', start=1, end=gmaps_config.get('geocoding').get('limit'), step=1, ttl=86400, connection=connection) options.update(force_update=force) options.update(parser=Italy) storage = Storage(job_name=PositionTask.get_name(country), storage_config=config.get('mongodb')) log = Log(log_name=PositionTask.get_name(country), config=config.get('mongodb')) task_list = TaskListMongoDB(task_type=PositionTask.get_name(country), options=options, storage=storage, log=log) executor = ExecutorWithLimit(task_list, counter) executor.run()
loader = LoaderFactory.loader_with_mongodb( storage_config=config.get('mongodb')) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) options.update(doc_factory=doc_factory) options.update(force_update=force) options.update(parser=France) options.update(headers={'User-Agent': 'Mozilla/5.0'}) options.update( url_format= "https://fr.wikipedia.org/w/index.php?search={0}&title=Sp%C3%A9cial:Recherche&profile=default&fulltext=1&searchengineselect=mediawiki&searchToken=ac9zaxa1lggzxpdhc5ukg06t6" ) storage = Storage(job_name=RequestTask.get_name(country), storage_config=config.get('mongodb')) log = Log(log_name=RequestTask.get_name(country), config=config.get('mongodb')) task_list = TaskListMongoDB(task_type=RequestTask.get_name(country), options=options, storage=storage, log=log) executor = Executor(task_list) while True: executor.run() sleep(10)
mongo_config = config.get('mongodb') connection = MongoClient(mongo_config['host'], mongo_config['port']) counter = CounterMongoDB(counter_name='gmap', start=1, end=gmaps_config.get('geocoding').get('limit'), step=1, ttl=86400, connection=connection) options.update(doc_factory=doc_factory) options.update(force_update=force) options.update(parser=Italy) storage = Storage(job_name=AddressTask.get_name(country), storage_config=config.get('mongodb')) log = Log(log_name=AddressTask.get_name(country), config=config.get('mongodb')) task_list = TaskListMongoDB(task_type=AddressTask.get_name(country), options=options, storage=storage, log=log) executor = ExecutorWithLimit(task_list, counter) executor.run()
storage_config=config.get('mongodb')) options.update(loader=loader) doc_factory = DocFactory(config.get('mongodb')) options.update(doc_factory=doc_factory) options.update(force_update=force) options.update(parser=Italy) options.update(host='it.wikipedia.org') options.update(headers={'User-Agent': 'Mozilla/5.0'}) storage = Storage(job_name=PageRecursiveTask.TYPE, storage_config=config.get('mongodb')) options.update() options.update(log_history=LogHistory('log/{}.log'.format(title))) options.update(recursive_storage=RecursiveParser(title, config.get('mongodb'))) log = Log(log_name=PageRecursiveTask.TYPE, config=config.get('mongodb')) task_list = TaskListMongoDB(task_type=PageRecursiveTask.TYPE, options=options, storage=storage, log=log) executor = Executor(task_list) while True: executor.run() sleep(10)