def from_spider(cls, spider):
     settings = spider.settings
     server = get_redis_from_settings(settings)
     not_visit_key = settings.get("SCHEDULER_NOT_VISIT_KEY",
                                  defaults.SCHEDULER_NOT_VISIT_KEY)
     key = not_visit_key % {'spider': spider.name}
     return cls(server, key=key)
 def from_spider(cls, spider):
     settings = spider.settings
     server = get_redis_from_settings(settings)
     not_download_key = settings.get("SCHEDULER_NOT_DOWNLOAD_KEY",
                                     defaults.SCHEDULER_NOT_DOWNLOAD_KEY)
     key = not_download_key % {'spider': spider.name}
     return cls(server, key=key)
 def __init__(self, settings):
     self.settings = settings
     self.server = get_redis_from_settings(settings)
     # 默认代理池URL为http://127.0.0.1:5010
     self.proxy_pool_url = settings.get('PROXY_POOL_URL',
                                        'http://127.0.0.1:5010')
     # 默认请求失败5次视为代理失效
     self.proxy_times_banned_max = settings.getint('PROXY_TIMES_BANNED_MAX',
                                                   5)
Exemple #4
0
 def compete_key(self):
     self.server = get_redis_from_settings(self.settings)
     self.redis_compete = self.settings.get('REDIS_COMPETE') % {'spider': self.name}
     self.redis_wait = self.settings.get('REDIS_WAIT') % {'spider': self.name}
     self.key = 1
     # self.server.sadd(self.key, fp)
     while self.server.sadd(self.redis_compete, self.key) == 0:
         self.key = self.key + 1
     self.logger.info("get key %s" % self.key)
 def from_settings(cls, settings):
     server = get_redis_from_settings(settings)
     key = '%s:notvisiturl' % settings.get("SPIDER_NAME", "HfutSpider")
     return cls(server, key=key)