예제 #1
0
class Getter():
    """代理获取器"""
    def __init__(self, proxy_key=setting.REDIS_KEY):
        self.redis = RedisClient(proxy_key=proxy_key)
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        判断是否达到了代理池限制
        """
        if self.redis.get_count() >= setting.POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        logger.info('代理获取器开始执行')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]

                # 获取代理
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    self.redis.add_proxy(proxy, setting.INITIAL_SCORE)
예제 #2
0
class TestRedisClient(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestRedisClient, self).__init__(*args, **kwargs)
        self.redis = RedisClient(proxy_key=TEST_REDIS_KEY)

    def setUp(self):
        self.redis.clear()
        print('')

    def tearDown(self):
        self.redis.clear()

    def test_add_proxy(self):
        self.redis.add_proxy('0.0.0.0:0', 10)
        self.redis.add_proxy('0.0.0.0', 10)
        self.assertTrue(self.redis.exist_proxy('0.0.0.0:0'))
        self.assertEqual(self.redis.get_count(), 1)

    def test_get_random(self):
        count = 10
        for i in range(count):
            self.redis.add_proxy('0.0.0.0:' + str(i), i)
        self.redis.set_proxy_max('0.0.0.0:0')

        # 获取到的是分数最大的代理
        self.assertEqual(self.redis.get_random(), '0.0.0.0:0')

        # 不存在分数最大的代理时,随机获取前100名的代理
        times = 5
        self.redis.decrease_proxy('0.0.0.0:0')
        print('不存在分数最大代理时,随机获取', times, '个代理:')
        for i in range(times):
            print(self.redis.get_random())