class Detection(object): def __init__(self): self.redis = RedisDB() async def detection_proxy(self, proxy, semaphore): async with semaphore: con = aiohttp.TCPConnector(verify_ssl=False, family=socket.AF_INET, limit=60) async with aiohttp.ClientSession(connector=con) as session: try: test_proxy = "http://" + proxy log.debug("正在测试代理:" + test_proxy) async with session.get(TEST_URL, proxy=test_proxy, timeout=7) as response: html = await response.text() if response.status == 200 and '检测到有异常请求' not in html: log.debug("\n" + proxy + " 代理可用") else: self.redis.delete_value(redis_key, proxy) log.debug("已清除失效的代理:" + proxy) except Exception as e: self.redis.delete_value(redis_key, proxy) log.debug("\n" + proxy + ' 代理请求失败') log.debug("已清除失效的代理:" + proxy) def run(self): try: proxies = self.redis.get_all(redis_key) for i in range(0, len(proxies), BATCH_SIZE): test_proxies = proxies[i:i + BATCH_SIZE] self.main(test_proxies) except Exception as e: log.debug("测试发生错误", e.args) def main(self, test_proxies): semaphore = asyncio.Semaphore(5) loop = asyncio.get_event_loop() task = [ self.detection_proxy(proxy, semaphore) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(task))
class Detection(object): def __init__(self): self.redis = RedisDB() self.test_url = 'https://m.weibo.cn/' @tools.debug async def get_html(self, root_url, proxy, semaphore): try: test_proxy = "http://" + proxy log.debug("正在测试代理:" + test_proxy) async with semaphore: response = await requests.get(root_url, proxy=test_proxy, timeout=5) html = await response.text() return response, html except asyncio.TimeoutError as err: #log.debug(err) return [], [] @tools.debug async def run(self, content_info): semaphore = asyncio.Semaphore(10) try: response, html = await self.get_html(self.test_url, content_info, semaphore) if html and response: if response.status == 200 and '检测到有异常请求' not in html: log.debug("\n" + content_info + " 代理可用") else: self.redis.delete_value(redis_key, content_info) log.debug("已清除失效的代理:" + content_info) else: self.redis.delete_value(redis_key, content_info) log.debug("已清除失效的代理:" + content_info) except Exception as e: print(e) self.redis.delete_value(redis_key, content_info) log.debug("\n" + content_info + ' 代理请求失败') log.debug("已清除失效的代理:" + content_info) def doing_main(self): task_list = self.redis.get_all(redis_key) log.debug('数据库中IP总数{}'.format(len(task_list))) tasks = [asyncio.ensure_future(self.run(data)) for data in task_list] loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks, timeout=6))