class SaveIp(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshold(self): """判断是否达到了代理池限制""" if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print(' 获取器开始执行 ') if not self.is_over_threshold(): proxies = self.crawler.run() for proxy in proxies: print(proxy, '存入') self.redis.add(proxy)
class Save(): def __init__(self): #连接redis self.redis = RedisClient() #初始化爬虫模块 self.crawler = Crawler() def proxies2redis(self): #检查redis中目前代理数量 小于10000则继续爬取代理 if self.redis.count() < POOL_THRESHOLD: start = time.time() # 调用每一个爬虫函数爬取代理 for index in range(self.crawler.__FuncCount__): func = self.crawler.__Funcs__[index] proxies = self.crawler.get_proxies(func) if index == self.crawler.__FuncCount__ - 1: self.redis.add_highly_proxies(proxies) else: self.redis.add_proxies(proxies) end = time.time() diff = end - start print("save proxies 2 redis consuming:", diff)
class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_theshold(self): """ 判断是否达到了代理池的限制 :return: """ if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print("获取器开始执行") if not self.is_over_theshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] proxies = self.crawler.get_proxies(callback) for proxy in proxies: self.redis.add(proxy)