Example #1
0
class SaveIp():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """判断是否达到了代理池限制"""
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print(' 获取器开始执行 ')
        if not self.is_over_threshold():
            proxies = self.crawler.run()
            for proxy in proxies:
                print(proxy, '存入')
                self.redis.add(proxy)
Example #2
0
class Save():
    def __init__(self):
        #连接redis
        self.redis = RedisClient()
        #初始化爬虫模块
        self.crawler = Crawler()

    def proxies2redis(self):
        #检查redis中目前代理数量 小于10000则继续爬取代理
        if self.redis.count() < POOL_THRESHOLD:
            start = time.time()
            # 调用每一个爬虫函数爬取代理
            for index in range(self.crawler.__FuncCount__):
                func = self.crawler.__Funcs__[index]
                proxies = self.crawler.get_proxies(func)
                if index == self.crawler.__FuncCount__ - 1:
                    self.redis.add_highly_proxies(proxies)
                else:
                    self.redis.add_proxies(proxies)
            end = time.time()
            diff = end - start
            print("save proxies 2 redis consuming:", diff)
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_theshold(self):
        """
         判断是否达到了代理池的限制
         :return:
         """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print("获取器开始执行")
        if not self.is_over_theshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.add(proxy)