class Getter(): """代理获取器""" def __init__(self, proxy_key=setting.REDIS_KEY): self.redis = RedisClient(proxy_key=proxy_key) self.crawler = Crawler() def is_over_threshold(self): """ 判断是否达到了代理池限制 """ if self.redis.get_count() >= setting.POOL_UPPER_THRESHOLD: return True else: return False def run(self): logger.info('代理获取器开始执行') if not self.is_over_threshold(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] # 获取代理 proxies = self.crawler.get_proxies(callback) sys.stdout.flush() for proxy in proxies: self.redis.add_proxy(proxy, setting.INITIAL_SCORE)
class TestRedisClient(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestRedisClient, self).__init__(*args, **kwargs) self.redis = RedisClient(proxy_key=TEST_REDIS_KEY) def setUp(self): self.redis.clear() print('') def tearDown(self): self.redis.clear() def test_add_proxy(self): self.redis.add_proxy('0.0.0.0:0', 10) self.redis.add_proxy('0.0.0.0', 10) self.assertTrue(self.redis.exist_proxy('0.0.0.0:0')) self.assertEqual(self.redis.get_count(), 1) def test_get_random(self): count = 10 for i in range(count): self.redis.add_proxy('0.0.0.0:' + str(i), i) self.redis.set_proxy_max('0.0.0.0:0') # 获取到的是分数最大的代理 self.assertEqual(self.redis.get_random(), '0.0.0.0:0') # 不存在分数最大的代理时,随机获取前100名的代理 times = 5 self.redis.decrease_proxy('0.0.0.0:0') print('不存在分数最大代理时,随机获取', times, '个代理:') for i in range(times): print(self.redis.get_random())