from scrapy.crawler import CrawlerRunner from myproject.spiders import Spider1, Spider2, Spider3 runner = CrawlerRunner() runner.crawl(Spider1) runner.crawl(Spider2) runner.crawl(Spider3) d = runner.join() d.addBoth(lambda _: reactor.stop()) reactor.run() # the script will block here until all crawling jobs are finished
from scrapy.crawler import CrawlerRunner from scrapy import Request from myproject.spiders import Spider1 urls = ['http://www.example.com/page1', 'http://www.example.com/page2', 'http://www.example.com/page3'] runner = CrawlerRunner() for url in urls: req = Request(url=url) runner.crawl(Spider1, req) d = runner.join() d.addBoth(lambda _: reactor.stop()) reactor.run() # the script will block here until all crawling jobs are finishedPackage library: Scrapy.