def make_data(settings):
    crawler = get_crawler(settings_dict=settings)
    spider = Spider.from_crawler(crawler, "example.com")
    return {
        "stats": crawler.stats.get_stats(),
        "crawler": crawler,
        "spider": spider,
        "runner": SpiderMonitorRunner(spider=spider),
        "job": None,
    }
def make_data_for_monitor(settings=None, stats=None):
    crawler = get_crawler(settings_dict=settings or {})
    spider = Spider.from_crawler(crawler, "example.com")
    return {
        "stats": stats or {},
        "crawler": crawler,
        "spider": spider,
        "runner": SpiderMonitorRunner(spider=spider),
        "job": None,
    }
Esempio n. 3
0
def make_queue(redis_server, cls: type, slots=None, skip_cache=True, settings=None,
               hints=None) -> BaseRequestQueue:
    global logging_configured
    if not logging_configured:
        configure_logging(settings=settings)
        logging_configured = True
    crawler = Crawler(Spider, settings=settings)
    if slots is None:
        slots = {}
    spider = Spider.from_crawler(crawler, 'test_dd_spider')
    if hints:
        spider.hint_urls = hints
    return cls(server=redis_server, spider=spider, key=SCHEDULER_QUEUE_KEY,
               slots_mock=slots, skip_cache=skip_cache)