def get_crawler(spidercls=None, settings_dict=None): """Return an unconfigured Crawler object. If settings_dict is given, it will be used to populate the crawler settings with a project level priority. """ from scrapy.crawler import CrawlerRunner from scrapy.settings import Settings from scrapy.spider import Spider runner = CrawlerRunner(Settings(settings_dict)) return runner._create_crawler(spidercls or Spider)
if self.settings.getbool('DNSCACHE_ENABLED'): cache_size = self.settings.getint('DNSCACHE_SIZE') else: cache_size = 0 print "wwj debug return my_cachingThreadedResolver" return my_CachingThreadedResolver( reactor=reactor, cache_size=cache_size, timeout=self.settings.getfloat('DNS_TIMEOUT')) settings = get_project_settings() my_process = my_CrawlerProcess(settings) runner = CrawlerRunner(settings) #### one runner, more spiders spidercls = runner.spider_loader.load('scrapy_spider') my_crawler = runner._create_crawler(spidercls) my_crawler.spider = my_crawler._create_spider('scrapy_spider') my_crawler.engine = my_crawler._create_engine() start_requests = iter(my_crawler.spider.start_requests()) close_if_idle = False my_crawler.engine.open_spider(my_crawler.spider, start_requests, close_if_idle) my_crawler.engine.start() #process.crawl('scrapy_spider') stop_after_crawl = False my_process.start(stop_after_crawl)
cache_size = self.settings.getint('DNSCACHE_SIZE') else: cache_size = 0 print "wwj debug return my_cachingThreadedResolver" return my_CachingThreadedResolver( reactor=reactor, cache_size=cache_size, timeout=self.settings.getfloat('DNS_TIMEOUT') ) settings = get_project_settings() my_process = my_CrawlerProcess(settings) runner = CrawlerRunner(settings) #### one runner, more spiders spidercls = runner.spider_loader.load('scrapy_spider') my_crawler = runner._create_crawler(spidercls) my_crawler.spider = my_crawler._create_spider('scrapy_spider') my_crawler.engine = my_crawler._create_engine() start_requests = iter(my_crawler.spider.start_requests()) close_if_idle = False my_crawler.engine.open_spider(my_crawler.spider, start_requests, close_if_idle) my_crawler.engine.start() #process.crawl('scrapy_spider') stop_after_crawl = False my_process.start(stop_after_crawl)