class Tester(object): def __init__(self): self.redis = RedisClient() async def test_one_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy async with session.get(test_url, proxy=real_proxy, timeout=15) as response: if response.status in status_code: self.redis.max(proxy) else: self.redis.decrease(proxy) except: self.redis.decrease(proxy) print('false:', proxy) def run(self): try: proxies = self.redis.all() loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) for i in range(0, len(proxies), batch_size): test_proxies = proxies[i:i + batch_size] tasks = [self.test_one_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('except:', e.args)
class Getter(): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over(self): if self.redis.count() >= pool_upper: return True else: return False def run(self): if not self.is_over(): for callback_label in range(self.crawler.__CrawlFuncCount__): callback = self.crawler.__CrawlFunc__[callback_label] proxies = self.crawler.get_proxies(callback) for proxy in proxies: self.redis.add(proxy)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_one_proxy(self, proxy): #测试单个proxy conn = aiohttp.TCPConnector(verify_ssl=False) #创建连接 async with aiohttp.ClientSession( connector=conn) as session: #async异步请求关键词 try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(test_url, proxy=real_proxy, timeout=15) as response: if response.status in status_code: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except: self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): #测试主函数 print('测试器开始运行') try: proxies = self.redis.all() #loop=asyncio.get_event_loop() #创建连接池 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) #创建新的loop可以避免loop冲突异常 for i in range(0, len(proxies), batch_size): test_proxies = proxies[i:i + batch_size] tasks = [self.test_one_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) #启动连接池 time.sleep(5) except Exception as e: print('测试器发生异常', e.args)
def get_conn(): if not hasattr(g, 'redis'): g.redis = RedisClient() return g.redis
def __init__(self): self.redis = RedisClient()
def __init__(self): self.redis = RedisClient() self.crawler = Crawler()