Exemple #1
0
class ProxyRefresh():

    def __init__(self, proxy_type='https'):
        if proxy_type == 'https':
            self.redis_handler = RedisClient('https_proxy')
        elif proxy_type == 'http':
            self.redis_handler = RedisClient('http_proxy')
        else:
            raise Exception('type must be https or http')
        self.proxy_type = proxy_type
        self.proxy_pool = set([*fuzz_all(), *self.redis_handler.get_all()])

    def refresh(self, pool_num=10):
        pool = ThreadPool(pool_num)
        pool.map(self.valid_ip, self.proxy_pool)
        pool.close()
        pool.join()

    def refresh_in_async(self):
        asynctask = AsyncTask()
        for ip in self.proxy_pool:
            asynctask.add_task(self.valid_ip, ip)
        asynctask.run()

    def valid_ip(self, ip):
        if proxy_is_useful(ip, self.proxy_type):
            self.redis_handler.add(ip)
            print('ok', ip)
        else:
            self.redis_handler.delete(ip)
Exemple #2
0
class SaveIp():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """判断是否达到了代理池限制"""
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print(' 获取器开始执行 ')
        if not self.is_over_threshold():
            proxies = self.crawler.run()
            for proxy in proxies:
                print(proxy, '存入')
                self.redis.add(proxy)
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_theshold(self):
        """
         判断是否达到了代理池的限制
         :return:
         """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print("获取器开始执行")
        if not self.is_over_theshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.add(proxy)