def schedule_crawl(spider, workflow, **kwargs): """Schedule a crawl using configuration from the workflow objects.""" from inspire_crawler.utils import get_crawler_instance crawler = get_crawler_instance() crawler_settings = current_app.config.get('CRAWLER_SETTINGS') crawler_settings.update(kwargs.get("crawler_settings", {})) crawler_arguments = kwargs crawler_arguments.update( current_app.config.get('CRAWLER_SPIDER_ARGUMENTS', {}).get(spider, {})) job_id = crawler.schedule( project=current_app.config.get('CRAWLER_PROJECT'), spider=spider, settings=crawler_settings, **crawler_arguments) if job_id: crawler_job = CrawlerJob.create( job_id=job_id, spider=spider, workflow=workflow, ) db.session.commit() current_app.logger.info( "Scheduled scrapyd job with id: {0}".format(job_id)) current_app.logger.info("Created crawler job with id:{0}".format( crawler_job.id)) else: raise CrawlerScheduleError( "Could not schedule '{0}' spider for project '{1}'".format( spider, current_app.config.get('CRAWLER_PROJECT'))) return crawler_job
def schedule_crawl(spider, workflow, **kwargs): """Schedule a crawl using configuration from the workflow objects.""" from inspire_crawler.utils import get_crawler_instance crawler = get_crawler_instance() crawler_settings = current_app.config.get('CRAWLER_SETTINGS') crawler_settings.update(kwargs.get("crawler_settings", {})) crawler_arguments = kwargs crawler_arguments.update( current_app.config.get('CRAWLER_SPIDER_ARGUMENTS', {}).get(spider, {}) ) job_id = crawler.schedule( project=current_app.config.get('CRAWLER_PROJECT'), spider=spider, settings=crawler_settings, **crawler_arguments ) if job_id: CrawlerJob.create( job_id=job_id, spider=spider, workflow=workflow, ) db.session.commit() current_app.logger.info("Scheduled job {0}".format(job_id)) else: raise CrawlerScheduleError( "Could not schedule '{0}' spider for project '{1}'".format( spider, current_app.config.get('CRAWLER_PROJECT') ) )
def test_utils(app): """Test tasks.""" with app.app_context(): assert get_crawler_instance()