def make_data(settings): crawler = get_crawler(settings_dict=settings) spider = Spider.from_crawler(crawler, "example.com") return { "stats": crawler.stats.get_stats(), "crawler": crawler, "spider": spider, "runner": SpiderMonitorRunner(spider=spider), "job": None, }
def make_data_for_monitor(settings=None, stats=None): crawler = get_crawler(settings_dict=settings or {}) spider = Spider.from_crawler(crawler, "example.com") return { "stats": stats or {}, "crawler": crawler, "spider": spider, "runner": SpiderMonitorRunner(spider=spider), "job": None, }
def make_queue(redis_server, cls: type, slots=None, skip_cache=True, settings=None, hints=None) -> BaseRequestQueue: global logging_configured if not logging_configured: configure_logging(settings=settings) logging_configured = True crawler = Crawler(Spider, settings=settings) if slots is None: slots = {} spider = Spider.from_crawler(crawler, 'test_dd_spider') if hints: spider.hint_urls = hints return cls(server=redis_server, spider=spider, key=SCHEDULER_QUEUE_KEY, slots_mock=slots, skip_cache=skip_cache)