Beispiel #1
0
def main():
    # 构建自定义的Spider对象
    # baidu_spider = BaiduSpider()
    # douban_spider = DoubanSpider()
    # # 将对象做为参数传给Engine


    #getattr(对象,属性)
    #getattr(文件绝对路径,文件里的类)



    # 构建需要执行的多个爬虫的字典
    # spiders = {
    #     BaiduSpider.name : BaiduSpider(),
    #     DoubanSpider.name : DoubanSpider()
    # }

    # # 自定义管道
    # pipelines = [
    #     BaiduPipeline1(),
    #     BaiduPipeline2(),
    #     DoubanPipeline1(),
    #     DoubanPipeline2(),
    # ]

    # # 自定义爬虫中间件
    # spider_middlewares = [
    #     SpiderMiddleware1(),
    #     SpiderMiddleware2()
    # ]

    # # 自定义下载中间件
    # downloader_middlewares = [
    #     DownloaderMiddleware1(),
    #     DownloaderMiddleware2()
    # ]


    # engine = Engine(
    #     spiders = spiders,
    #     pipelines = pipelines,
    #     spider_mids = spider_middlewares,
    #     downloader_mids = downloader_middlewares
    # )


    engine = Engine()
    engine.start()
Beispiel #2
0
def main():
    # 通过自定义的BaiduSpider类构建了爬虫对象
    # baidu_spider = BaiduSpider()
    # douban_spider = DoubanSpider()
    #spiders = {"baidu" : baidu_spider, "douban" : douban_spider}
    #spiders = {baidu_spider.name : baidu_spider, douban_spider.name : douban_spider}
    # spiders = {BaiduSpider.name : BaiduSpider(), DoubanSpider.name : DoubanSpider()}

    # 通过自定义的Pipeline类构建了多个管道对象
    # pipelines = [
    #     BaiduPipeline1(),
    #     BaiduPipeline2(),
    #     DoubanPipeline1(),
    #     DoubanPipeline2()
    # ]

    # 通过自定义的middlewares构建了多个爬虫中间件
    # spider_middlewares = [
    #     SpiderMiddleware1(),
    #     SpiderMiddleware2()
    # ]

    # 通过自定义的middlewares构建了多个下载中间件
    # downloader_middlewares = [
    #     DownloaderMiddleware1(),
    #     DownloaderMiddleware2()
    # ]

    # ,把对象传给Engine,再创建框架提供的引擎对象
    # engine = Engine(
    #     spiders=spiders,
    #     pipelines=pipelines,
    #     spider_mids = spider_middlewares,
    #     downloader_mids = downloader_middlewares
    # )

    engine = Engine()
    # 执行start()方法,启动框架执行程序
    engine.start()
Beispiel #3
0
from scrapy_plus.core.engine import Engine
from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider
# from spiders.pipelines import BaiduPipline
# from spiders.pipelines import DoubanPipline
# from middlewares.spider_middlewares import TestSpiderMiddleware1,TestSpiderMiddleware2
# from middlewares.downloader_middlewares import TestDownloaderMiddleware1,TestDownloaderMiddleware2

if __name__ == '__main__':
    # baidu_spider = BaiduSpider()
    # douban_spider = DoubanSpider()
    # #
    # spiders = {
    #     BaiduSpider.name:baidu_spider,
    #     DoubanSpider.name:douban_spider
    # }
    # pipelines = [
    #     BaiduPipline(),
    #     DoubanPipline()
    # ]
    # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()]
    # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()]

    engine = Engine()
    engine.start()
Beispiel #4
0
    # 创建百度爬虫
    baidu_spider = BaiduSpider()

    # 2.1.1-3创建豆瓣爬虫
    douban_spider = DoubanSpider()
    # 2.2-1 准备一个爬虫的字典
    # 2.3-5:把原来写死的名称换成爬虫中的名称
    spiders = {
        BaiduSpider.name: baidu_spider,
        DoubanSpider.name: douban_spider
    }
    # 2.3-6:创建管道的列表,传入到引擎中去
    pipelines = [BaiduPipeline(), DoubanPipeline()]

    # 2.4-2:定义爬虫中间列表和下载器中间件列表
    spider_middlewares = [
        BaiduSpiderMiddleware(),
        DoubanSpiderMiddleware(),
    ]

    downloader_middlewares = [
        BaiduDownloaderMiddleware(),
        DoubanDownloaderMiddleware()
    ]
    # 创建引擎对象
    engine = Engine(spiders, pipelines, spider_middlewares,
                    downloader_middlewares)

    # engine = Engine()
    engine.start()
Beispiel #5
0
# THE WINTER IS COMING! the old driver will be driving who was a man of the world!
# -*- coding: utf-8 -*- python 3.6.7, create time is 18-11-30 下午5:04 GMT+8

from scrapy_plus.core.engine import Engine

from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider


if __name__ == '__main__':

    douban = DoubanSpider()
    baidu = BaiduSpider()
    spiders = {baidu.name:baidu, douban.name:douban}

    engine = Engine(spiders)
    engine.start()
Beispiel #6
0
from spider_middlewares import TestSpidermiddleware1, TestSpiderMiddleware2
from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2

# if __name__ == '__main__':
#     # spider = BaiduSpider()
#     # baidu_spider = BaiduSpider()    # 实例化爬虫对象
#     douban_spider = DoubanSpider()    # 实例化爬虫对象
#     spiders = {DoubanSpider.name: douban_spider}
#     pipelines = [DoubanPipline()]
#
#     spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()]
#     downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()]
#
#
#     engine = Engine(spiders,piplines=pipelines,spider_mids=spider_mids, downloader_mids=downloader_mids)    # 传入爬虫对象
#     engine.start()    # 启动引擎

if __name__ == '__main__':
    # spider = BaiduSpider()
    # # baidu_spider = BaiduSpider()    # 实例化爬虫对象
    # douban_spider = DoubanSpider()    # 实例化爬虫对象
    # spiders = {DoubanSpider.name: douban_spider}
    # pipelines = [DoubanPipline()]
    #
    # spider_mids = [TestSpidermiddleware1(), TestSpiderMiddleware2()]
    # downloader_mids = [TestDownloaderMiddleware1(), TestDownloaderMiddleware2()]


    engine = Engine()    # 传入爬虫对象
    engine.start()    # 启动引擎
Beispiel #7
0
# THE WINTER IS COMING! the old driver will be driving who was a man of the world!
# -*- coding: utf-8 -*- python 3.6.7, create time is 18-11-30 下午5:04 GMT+8

from scrapy_plus.core.engine import Engine

from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider
from pipelines import BaiduPipeline, DoubanPipeline
from middlewares.spider_middlewares import TestSpiderMiddleware1, TestSpiderMiddleware2
from middlewares.downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2

if __name__ == '__main__':

    douban = DoubanSpider()
    baidu = BaiduSpider()
    spiders = {baidu.name: baidu, douban.name: douban}
    pipelines = [DoubanPipeline(), BaiduPipeline()]
    spider_mids = [TestSpiderMiddleware1(), TestSpiderMiddleware2()]
    downloader_mids = [
        TestDownloaderMiddleware1(),
        TestDownloaderMiddleware2()
    ]

    engine = Engine(spiders=spiders,
                    pipelines=pipelines,
                    spider_mids=spider_mids,
                    downloader_mids=downloader_mids)
    engine.start()
Beispiel #8
0
"""
项目目录,对框架进行测试
"""
from scrapy_plus.core.engine import Engine
# from .spider import BaiduSpider

if __name__ == '__main__':

    engine = Engine()
    engine.start_engine()
Beispiel #9
0
# -*- coding: utf-8 -*-

from scrapy_plus.core.engine import Engine  # 导入引擎

if __name__ == '__main__':
    engine = Engine()  # 创建引擎对象
    engine.start()  # 启动引擎
Beispiel #10
0
from scrapy_plus.core.engine import Engine
from spiders.baidu_spider import BaiduSpider
from spiders.douban_spider import DoubanSpider
# 多管道
from scrapy_plus.project_dir.pipelines import BaiduPipeline, DoubanPipeline
# 多中间键
from scrapy_plus.project_dir.spiders_middlewares import BaiduMiddleware, DoubanMiddleware

if __name__ == '__main__':
    # # 百度爬虫
    # bds = BaiduSpider()
    # # 豆瓣爬
    # dbs = DoubanSpider()
    # # 将所有爬虫写成字典
    # spiders = {
    #     BaiduSpider.name : bds,
    #     DoubanSpider.name : dbs
    # }

    # 课件选用列表
    # pipelines = [BaiduPipeline(), DoubanPipeline()]
    pipelines = {'baidu': BaiduPipeline(), 'douban': DoubanPipeline()}
    middlewares = {'baidu': BaiduMiddleware(), 'douban': DoubanMiddleware()}

    engine = Engine(pipelines=pipelines, middlewares=middlewares)
    engine.start()
Beispiel #11
0
# THE WINTER IS COMING! the old driver will be driving who was a man of the world!
# -*- coding: utf-8 -*- python 3.6.7, create time is 18-11-30 下午5:04 GMT+8

from scrapy_plus.core.engine import Engine

from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider
from pipelines import BaiduPipeline, DoubanPipeline

if __name__ == '__main__':

    douban = DoubanSpider()
    baidu = BaiduSpider()
    spiders = {baidu.name: baidu, douban.name: douban}

    pipelines = [DoubanPipeline(), BaiduPipeline()]

    engine = Engine(spiders=spiders, pipelines=pipelines)
    engine.start()
Beispiel #12
0
# -*- coding: utf-8 -*-

from scrapy_plus.core.engine import Engine

if __name__ == '__main__':
    engine =Engine()
    engine.start()
Beispiel #13
0
from project_dir.spiders.douban import DouBanSpider
from scrapy_plus.core.engine import Engine
from spiders.baidu import BaiduSpider

if __name__ == '__main__':
    spider = BaiduSpider()
    #engine = Engine(spider)
    douban = DouBanSpider()
    engine = Engine(douban)
    engine.start()
Beispiel #14
0
def main():
    # 1. 定时发送请求
    engine = Engine()
    while True:
        engine.start()
        time.sleep(3)
Beispiel #15
0
from scrapy_plus.core.engine import Engine
from spiders.baidu import BaiduSpider
from spiders.douban import DoubanSpider
from pipelines import BaiduPipeline, DoubanPipeline
from spider_middlewares import TestSpiderMiddleware1, TestSpiderMiddleware2
from downloader_middlewares import TestDownloaderMiddleware1, TestDownloaderMiddleware2
import settings
if __name__ == '__main__':
    # 实例化爬虫
    # baidu = BaiduSpider()
    # douban = DoubanSpider()
    # spiders = {baidu.name:baidu,douban.name:douban}
    # pipelines = [BaiduPipeline(),DoubanPipeline()]
    # spider_mids = [TestSpiderMiddleware1(),TestSpiderMiddleware2()]
    # downloader_mids = [TestDownloaderMiddleware1(),TestDownloaderMiddleware2()]
    engine = Engine()  # 实例化引擎
    engine.start()  # 引擎启动