def main(): # 构建自定义的Spider对象 # baidu_spider = BaiduSpider() # douban_spider = DoubanSpider() # # 将对象做为参数传给Engine #getattr(对象,属性) #getattr(文件绝对路径,文件里的类) # 构建需要执行的多个爬虫的字典 # spiders = { # BaiduSpider.name : BaiduSpider(), # DoubanSpider.name : DoubanSpider() # } # # 自定义管道 # pipelines = [ # BaiduPipeline1(), # BaiduPipeline2(), # DoubanPipeline1(), # DoubanPipeline2(), # ] # # 自定义爬虫中间件 # spider_middlewares = [ # SpiderMiddleware1(), # SpiderMiddleware2() # ] # # 自定义下载中间件 # downloader_middlewares = [ # DownloaderMiddleware1(), # DownloaderMiddleware2() # ] # engine = Engine( # spiders = spiders, # pipelines = pipelines, # spider_mids = spider_middlewares, # downloader_mids = downloader_middlewares # ) engine = Engine() engine.start()
def main(): # 通过自定义的BaiduSpider类构建了爬虫对象 # baidu_spider = BaiduSpider() # douban_spider = DoubanSpider() #spiders = {"baidu" : baidu_spider, "douban" : douban_spider} #spiders = {baidu_spider.name : baidu_spider, douban_spider.name : douban_spider} # spiders = {BaiduSpider.name : BaiduSpider(), DoubanSpider.name : DoubanSpider()} # 通过自定义的Pipeline类构建了多个管道对象 # pipelines = [ # BaiduPipeline1(), # BaiduPipeline2(), # DoubanPipeline1(), # DoubanPipeline2() # ] # 通过自定义的middlewares构建了多个爬虫中间件 # spider_middlewares = [ # SpiderMiddleware1(), # SpiderMiddleware2() # ] # 通过自定义的middlewares构建了多个下载中间件 # downloader_middlewares = [ # DownloaderMiddleware1(), # DownloaderMiddleware2() # ] # ,把对象传给Engine,再创建框架提供的引擎对象 # engine = Engine( # spiders=spiders, # pipelines=pipelines, # spider_mids = spider_middlewares, # downloader_mids = downloader_middlewares # ) engine = Engine() # 执行start()方法,启动框架执行程序 engine.start()
from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider # from spiders.pipelines import BaiduPipline # from spiders.pipelines import DoubanPipline # from middlewares.spider_middlewares import TestSpiderMiddleware1,TestSpiderMiddleware2 # from middlewares.downloader_middlewares import TestDownloaderMiddleware1,TestDownloaderMiddleware2 if __name__ == '__main__': # baidu_spider = BaiduSpider() # douban_spider = DoubanSpider() # # # spiders = { # BaiduSpider.name:baidu_spider, # DoubanSpider.name:douban_spider # } # pipelines = [ # BaiduPipline(), # DoubanPipline() # ] # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()] engine = Engine() engine.start()
from spider_middlewares import TestSpidermiddleware1, TestSpiderMiddleware2 from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2 # if __name__ == '__main__': # # spider = BaiduSpider() # # baidu_spider = BaiduSpider() # 实例化爬虫对象 # douban_spider = DoubanSpider() # 实例化爬虫对象 # spiders = {DoubanSpider.name: douban_spider} # pipelines = [DoubanPipline()] # # spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()] # # # engine = Engine(spiders,piplines=pipelines,spider_mids=spider_mids, downloader_mids=downloader_mids) # 传入爬虫对象 # engine.start() # 启动引擎 if __name__ == '__main__': # spider = BaiduSpider() # # baidu_spider = BaiduSpider() # 实例化爬虫对象 # douban_spider = DoubanSpider() # 实例化爬虫对象 # spiders = {DoubanSpider.name: douban_spider} # pipelines = [DoubanPipline()] # # spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()] engine = Engine() # 传入爬虫对象 engine.start() # 启动引擎
# -*- coding: utf-8 -*- from scrapy_plus.core.engine import Engine if __name__ == '__main__': engine =Engine() engine.start()
def main(): # 1. 定时发送请求 engine = Engine() while True: engine.start() time.sleep(3)
from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider from pipelines import BaiduPipeline, DoubanPipeline from spider_middlewares import TestSpiderMiddleware1, TestSpiderMiddleware2 from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2 import settings if __name__ == '__main__': # 实例化爬虫 # baidu = BaiduSpider() # douban = DoubanSpider() # spiders = {baidu.name:baidu,douban.name:douban} # pipelines = [BaiduPipeline(),DoubanPipeline()] # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()] engine = Engine() # 实例化引擎 engine.start() # 引擎启动