def __init__(self): self.spider = Spider() self.scheduler = Scheduler() self.pipeline = Pipeline() self.downloader = Downloader() self.spider_mid = SpiderMiddleware() # 初始化爬虫中间件对象 self.downloader_mid = DownloaderMiddleware() # 初始化下载器中间件对象
def __init__(self, spiders): self.spiders = spiders self.scheduler = Scheduler() self.downloader = Downloader() self.pipeline = Pipeline() self.spider_mid = SpiderMiddleware() self.downloader_mid = DownloaderMiddleware() self.total_response_nums = 0 self.total_request_nums = 0
def __init__(self): self.spiders = self._auto_import_instances(path=SPIDERS, isspider=True) self.scheduler = Scheduler() self.downloader = Downloader() self.pipelines = self._auto_import_instances(path=PIPELINES) self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_instances( path=DOWNLOADER_MIDDLEWARES) self.total_response_nums = 0 self.total_request_nums = 0
def __init__(self): self.spiders = self._auto_import_ret(path=SPIDERS, isspider=True) self.scheduler = Scheduler() self.downloader = Downloader() self.pipelines = self._auto_import_ret(path=PIPELINES) self.spider_mids = self._auto_import_ret(path=SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_ret( path=DOWNLOADER_MIDDLEWARES) self.total_request_nums = 0 self.total_response_nums = 0 self.pool = Pool(MAX_REQUEST_NUMS) self.is_running = True
def __init__(self): self.spiders = self._auto_import_instances(path=SPIDERS, isspider=True) # 爬虫字典 self.scheduler = Scheduler() self.downloader = Downloader() self.pipelines = self._auto_import_instances(path=PIPELINES) self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_instances( path=DOWNLOADER_MIDDLEWARES) self.total_response_nums = 0 self.total_request_nums = 0 self.pool = Pool(5) # os.cpu_count() or 1 self.is_running = True
def __init__(self): # 接收外部传入的爬虫对象 """实例化其他的组件,在引擎中通过调用组件的方法实现其功能""" # print(spiders) self.scheduler = Scheduler() # 初始化调度器对象 self.downloader = Downloader() # 初始化下载器对象 self.spiders = self._auto_import_instances(SPIDERS, is_spider=True) # 爬虫对象 字典 self.pipelines = self._auto_import_instances(PIPELINES) # 管道对象 列表 self.spider_mids = self._auto_import_instances( SPIDER_MIDDLEWARES) # 列表 self.downloader_mids = self._auto_import_instances( DOWNLOADER_MIDDLEWARES) # 列表 self.total_request_nums = 0 self.total_response_nums = 0 self.pool = Pool() # 实例化线程池对象 self.is_running = False # 判断程序是否执行标志
def __init__(self): self.spiders = self.__auto_import( settings.SPIDERS, is_spider=True) # 这里传递过来的是一个字典{爬虫名:爬虫对象} # 创建统计器对象 self.stats_collector = StatsCollector() # 把统计器对象传递给调度器 self.scheduler = Scheduler(self.stats_collector) self.downloader = Downloader() self.pipelines = self.__auto_import(settings.PIPELINES) self.spider_middlewares = self.__auto_import( settings.SPIDER_MIDDLEWARES) self.downloader_middlewares = self.__auto_import( settings.DOWNLOADER_MIDDLEWARES) self.pool = Pool() # 创建线程池对象 # 定义变量,用于记录起始请求完成的爬虫数量 self.start_request_finished_spider_count = 0
def __init__(self): self.spiders = self._auto_import_instances(path=SPIDERS, isspider=True) # 爬虫字典 self.pipelines = self._auto_import_instances(path=PIPELINES) self.spider_mids = self._auto_import_instances(path=SPIDER_MIDDLEWARES) self.downloader_mids = self._auto_import_instances(path=DOWNLOADER_MIDDLEWARES) if SCHEDULER_PERSIST: self.collector = ReidsStatsCollector() else: self.collector = NormalStatsCollector() # self.total_response_nums = 0 # self.total_request_nums = 0 self.scheduler = Scheduler(self.collector) self.downloader = Downloader() self.pool = Pool(5) # os.cpu_count() or 1 self.is_running = True
def __init__(self): self.spider = Spider() self.scheduler = Scheduler() self.downloader = Downloader() self.pipeline = Pipeline()