def main(): # 构建自定义的Spider对象 # baidu_spider = BaiduSpider() # douban_spider = DoubanSpider() # # 将对象做为参数传给Engine #getattr(对象,属性) #getattr(文件绝对路径,文件里的类) # 构建需要执行的多个爬虫的字典 # spiders = { # BaiduSpider.name : BaiduSpider(), # DoubanSpider.name : DoubanSpider() # } # # 自定义管道 # pipelines = [ # BaiduPipeline1(), # BaiduPipeline2(), # DoubanPipeline1(), # DoubanPipeline2(), # ] # # 自定义爬虫中间件 # spider_middlewares = [ # SpiderMiddleware1(), # SpiderMiddleware2() # ] # # 自定义下载中间件 # downloader_middlewares = [ # DownloaderMiddleware1(), # DownloaderMiddleware2() # ] # engine = Engine( # spiders = spiders, # pipelines = pipelines, # spider_mids = spider_middlewares, # downloader_mids = downloader_middlewares # ) engine = Engine() engine.start()
def main(): # 通过自定义的BaiduSpider类构建了爬虫对象 # baidu_spider = BaiduSpider() # douban_spider = DoubanSpider() #spiders = {"baidu" : baidu_spider, "douban" : douban_spider} #spiders = {baidu_spider.name : baidu_spider, douban_spider.name : douban_spider} # spiders = {BaiduSpider.name : BaiduSpider(), DoubanSpider.name : DoubanSpider()} # 通过自定义的Pipeline类构建了多个管道对象 # pipelines = [ # BaiduPipeline1(), # BaiduPipeline2(), # DoubanPipeline1(), # DoubanPipeline2() # ] # 通过自定义的middlewares构建了多个爬虫中间件 # spider_middlewares = [ # SpiderMiddleware1(), # SpiderMiddleware2() # ] # 通过自定义的middlewares构建了多个下载中间件 # downloader_middlewares = [ # DownloaderMiddleware1(), # DownloaderMiddleware2() # ] # ,把对象传给Engine,再创建框架提供的引擎对象 # engine = Engine( # spiders=spiders, # pipelines=pipelines, # spider_mids = spider_middlewares, # downloader_mids = downloader_middlewares # ) engine = Engine() # 执行start()方法,启动框架执行程序 engine.start()
from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider # from spiders.pipelines import BaiduPipline # from spiders.pipelines import DoubanPipline # from middlewares.spider_middlewares import TestSpiderMiddleware1,TestSpiderMiddleware2 # from middlewares.downloader_middlewares import TestDownloaderMiddleware1,TestDownloaderMiddleware2 if __name__ == '__main__': # baidu_spider = BaiduSpider() # douban_spider = DoubanSpider() # # # spiders = { # BaiduSpider.name:baidu_spider, # DoubanSpider.name:douban_spider # } # pipelines = [ # BaiduPipline(), # DoubanPipline() # ] # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()] engine = Engine() engine.start()
# 创建百度爬虫 baidu_spider = BaiduSpider() # 2.1.1-3创建豆瓣爬虫 douban_spider = DoubanSpider() # 2.2-1 准备一个爬虫的字典 # 2.3-5:把原来写死的名称换成爬虫中的名称 spiders = { BaiduSpider.name: baidu_spider, DoubanSpider.name: douban_spider } # 2.3-6:创建管道的列表,传入到引擎中去 pipelines = [BaiduPipeline(), DoubanPipeline()] # 2.4-2:定义爬虫中间列表和下载器中间件列表 spider_middlewares = [ BaiduSpiderMiddleware(), DoubanSpiderMiddleware(), ] downloader_middlewares = [ BaiduDownloaderMiddleware(), DoubanDownloaderMiddleware() ] # 创建引擎对象 engine = Engine(spiders, pipelines, spider_middlewares, downloader_middlewares) # engine = Engine() engine.start()
# THE WINTER IS COMING! the old driver will be driving who was a man of the world! # -*- coding: utf-8 -*- python 3.6.7, create time is 18-11-30 下午5:04 GMT+8 from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider if __name__ == '__main__': douban = DoubanSpider() baidu = BaiduSpider() spiders = {baidu.name:baidu, douban.name:douban} engine = Engine(spiders) engine.start()
from spider_middlewares import TestSpidermiddleware1, TestSpiderMiddleware2 from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2 # if __name__ == '__main__': # # spider = BaiduSpider() # # baidu_spider = BaiduSpider() # 实例化爬虫对象 # douban_spider = DoubanSpider() # 实例化爬虫对象 # spiders = {DoubanSpider.name: douban_spider} # pipelines = [DoubanPipline()] # # spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()] # # # engine = Engine(spiders,piplines=pipelines,spider_mids=spider_mids, downloader_mids=downloader_mids) # 传入爬虫对象 # engine.start() # 启动引擎 if __name__ == '__main__': # spider = BaiduSpider() # # baidu_spider = BaiduSpider() # 实例化爬虫对象 # douban_spider = DoubanSpider() # 实例化爬虫对象 # spiders = {DoubanSpider.name: douban_spider} # pipelines = [DoubanPipline()] # # spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()] engine = Engine() # 传入爬虫对象 engine.start() # 启动引擎
from scrapy_plus.core.engine import Engine from spiders.baidu_spider import BaiduSpider from spiders.douban_spider import DoubanSpider # 多管道 from scrapy_plus.project_dir.pipelines import BaiduPipeline, DoubanPipeline # 多中间键 from scrapy_plus.project_dir.spiders_middlewares import BaiduMiddleware, DoubanMiddleware if __name__ == '__main__': # # 百度爬虫 # bds = BaiduSpider() # # 豆瓣爬 # dbs = DoubanSpider() # # 将所有爬虫写成字典 # spiders = { # BaiduSpider.name : bds, # DoubanSpider.name : dbs # } # 课件选用列表 # pipelines = [BaiduPipeline(), DoubanPipeline()] pipelines = {'baidu': BaiduPipeline(), 'douban': DoubanPipeline()} middlewares = {'baidu': BaiduMiddleware(), 'douban': DoubanMiddleware()} engine = Engine(pipelines=pipelines, middlewares=middlewares) engine.start()
# THE WINTER IS COMING! the old driver will be driving who was a man of the world! # -*- coding: utf-8 -*- python 3.6.7, create time is 18-11-30 下午5:04 GMT+8 from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider from pipelines import BaiduPipeline, DoubanPipeline from middlewares.spider_middlewares import TestSpiderMiddleware1, TestSpiderMiddleware2 from middlewares.downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2 if __name__ == '__main__': douban = DoubanSpider() baidu = BaiduSpider() spiders = {baidu.name: baidu, douban.name: douban} pipelines = [DoubanPipeline(), BaiduPipeline()] spider_mids = [TestSpiderMiddleware1(), TestSpiderMiddleware2()] downloader_mids = [ TestDownloaderMiddleware1(), TestDownloaderMiddleware2() ] engine = Engine(spiders=spiders, pipelines=pipelines, spider_mids=spider_mids, downloader_mids=downloader_mids) engine.start()
# -*- coding: utf-8 -*- from scrapy_plus.core.engine import Engine # 导入引擎 if __name__ == '__main__': engine = Engine() # 创建引擎对象 engine.start() # 启动引擎
# THE WINTER IS COMING! the old driver will be driving who was a man of the world! # -*- coding: utf-8 -*- python 3.6.7, create time is 18-11-30 下午5:04 GMT+8 from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider from pipelines import BaiduPipeline, DoubanPipeline if __name__ == '__main__': douban = DoubanSpider() baidu = BaiduSpider() spiders = {baidu.name: baidu, douban.name: douban} pipelines = [DoubanPipeline(), BaiduPipeline()] engine = Engine(spiders=spiders, pipelines=pipelines) engine.start()
from project_dir.spiders.douban import DouBanSpider from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider if __name__ == '__main__': spider = BaiduSpider() #engine = Engine(spider) douban = DouBanSpider() engine = Engine(douban) engine.start()
def main(): # 1. 定时发送请求 engine = Engine() while True: engine.start() time.sleep(3)
from scrapy_plus.core.engine import Engine from spiders.baidu import BaiduSpider from spiders.douban import DoubanSpider from pipelines import BaiduPipeline, DoubanPipeline from spider_middlewares import TestSpiderMiddleware1, TestSpiderMiddleware2 from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2 import settings if __name__ == '__main__': # 实例化爬虫 # baidu = BaiduSpider() # douban = DoubanSpider() # spiders = {baidu.name:baidu,douban.name:douban} # pipelines = [BaiduPipeline(),DoubanPipeline()] # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()] # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()] engine = Engine() # 实例化引擎 engine.start() # 引擎启动