import scrapy from scrapy.crawler import CrawlerRunner from scrapy.utils.project import get_project_settings class MySpider(scrapy.Spider): name = "my_spider" # ... set spider configuration ... def main(): settings = get_project_settings() runner = CrawlerRunner(settings) crawler = runner.create_crawler(MySpider) runner.crawl(crawler) runner.join() if __name__ == '__main__': main()
import scrapy from scrapy.spiders import Spider from scrapy.crawler import CrawlerRunner from twisted.internet import reactor, defer class MySpider1(Spider): name = "my_spider_1" # ... set spider configuration ... class MySpider2(Spider): name = "my_spider_2" # ... set spider configuration ... def stop_reactor(): reactor.stop() def main(): runner = CrawlerRunner() runner.crawl(MySpider1) runner.crawl(MySpider2) d = runner.join() d.addBoth(stop_reactor) reactor.run() if __name__ == '__main__': main()In this example, we define two spiders, MySpider1 and MySpider2. We create a CrawlerRunner instance and add both the spiders to it using the crawl method. We then use the join method to wait for both spiders to complete before stopping the reactor. The reactor.run method starts the execution of the spiders, which run concurrently. The Scrapy package provides the necessary libraries to implement web scraping and crawling.