def from_spider(cls, spider): settings = spider.settings server = get_redis_from_settings(settings) not_visit_key = settings.get("SCHEDULER_NOT_VISIT_KEY", defaults.SCHEDULER_NOT_VISIT_KEY) key = not_visit_key % {'spider': spider.name} return cls(server, key=key)
def from_spider(cls, spider): settings = spider.settings server = get_redis_from_settings(settings) not_download_key = settings.get("SCHEDULER_NOT_DOWNLOAD_KEY", defaults.SCHEDULER_NOT_DOWNLOAD_KEY) key = not_download_key % {'spider': spider.name} return cls(server, key=key)
def __init__(self, settings): self.settings = settings self.server = get_redis_from_settings(settings) # 默认代理池URL为http://127.0.0.1:5010 self.proxy_pool_url = settings.get('PROXY_POOL_URL', 'http://127.0.0.1:5010') # 默认请求失败5次视为代理失效 self.proxy_times_banned_max = settings.getint('PROXY_TIMES_BANNED_MAX', 5)
def compete_key(self): self.server = get_redis_from_settings(self.settings) self.redis_compete = self.settings.get('REDIS_COMPETE') % {'spider': self.name} self.redis_wait = self.settings.get('REDIS_WAIT') % {'spider': self.name} self.key = 1 # self.server.sadd(self.key, fp) while self.server.sadd(self.redis_compete, self.key) == 0: self.key = self.key + 1 self.logger.info("get key %s" % self.key)
def from_settings(cls, settings): server = get_redis_from_settings(settings) key = '%s:notvisiturl' % settings.get("SPIDER_NAME", "HfutSpider") return cls(server, key=key)