class Tester(object): def __init__(self): self.redis = RedisClient() self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', } async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy # real_proxy = 'https://' + proxy print('正在测试', proxy) async with session.get(url=TEST_URL, proxy=real_proxy, headers=self.headers, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') count = self.redis.count() print('当前剩余', count, '个代理') # 每次运行本测试单元,应该先将库存里满分的代理取出来测试,剔除无效代理,保证开启线程池后提供的代理即是可用的; useful_ip = self.redis.all_useful() if useful_ip: count_usefully = len(useful_ip) print('第一个有用的代理: {}'.format(useful_ip[0]), '共{}个'.format(count_usefully)) for i in range(0, count_usefully, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count_usefully) print('正在测试第', start + 1, '-', stop, '个代理(usefully)') self.batch_proxies(useful_ip[start: stop + 1]) else: print('当前无可用代理,请等待...') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理(normally)') test_proxies = self.redis.batch(start, stop) self.batch_proxies(test_proxies) def batch_proxies(self, test_proxies): try: loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)