예제 #1
0
class Getter():

    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        判断是否达到了代理池限制
        :return:
        """
        if self.redis.count() > POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print('获取器开始')
        n=0
        if not self.is_over_threshold():
            for callback_lable in range(self.crawler.__CrawlFuncCount__):
                callback= self.crawler.__CrawlFunc__[callback_lable]
                # 获取代理
                proxies = self.crawler.get_proxies(callback)
                for proxy in  proxies:
                    self.redis.add(proxy)
예제 #2
0
class Getter(object):
    """
    有了Crawler类之后,在定义一个Getter类,
    用来动态地调用所有以crawl_开头的方法,
    然后获取到的代理,将其加入数据库中保存起来。
    """
    def __init__(self):
        """初始化"""
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        判断代理池是否溢出
        :return: 返回判断结果
        """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print("获取器开始执行...")
        if not self.is_over_threshold():
            if CRAWLER_FREE_ENABLED:
                for callback_label in range(self.crawler.__CrawlFuncCount__):
                    callback = self.crawler.__CrawlFunc__[callback_label]
                    proxies = self.crawler.get_proxies(callback)
                    sys.stdout.flush()
                    for proxy in proxies:
                        self.redis.add(proxy)
            elif CRAWLER_MONEY_ENABLED:
                for callback_label in range(self.crawler.__MoneyFuncCount__):
                    callback = self.crawler.__MoneyFunc__[callback_label]
                    proxies = self.crawler.get_proxies(callback)
                    sys.stdout.flush()
                    for proxy in proxies:
                        self.redis.add(proxy)
예제 #3
0
class Tester(object):
    """定义一个检测模块类"""
    def __init__(self):
        """初始化"""
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        检测单个代理
        :param proxy: 被检测单个代理
        :return: None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf-8")
                real_proxy = "http://" + proxy
                print("正在测试代理", proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError, asyncio.TimeoutError, AttributeError, ConnectionError):
                self.redis.decrease(proxy)
                print("代理请求失败", proxy)

    def run(self):
        """
        测试主函数
        :return: None
        """
        print("测试器开始启动......")
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            # 采用批量测试避免一次性测试全部代理导致内存开销过大问题
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i: i+BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print("测试器出错了,%s" % e.args)
예제 #4
0
 def __init__(self):
     """初始化"""
     self.redis = RedisClient()
     self.crawler = Crawler()
예제 #5
0
파일: api.py 프로젝트: maxnoodles/ip_pool
def get_conn():
    if not hasattr(g, 'redis'):
        g.redis = RedisClient()
    return RedisClient()
예제 #6
0
 def __init__(self):
     """初始化"""
     self.redis = RedisClient()