class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() async def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing', proxy) async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Invalid proxy', proxy) except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s: print(s) pass def test(self): """ aio test all proxies. """ print('ValidityTester is working') try: if self._raw_proxies: loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies] loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('Async Error')
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() async def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing ' + real_proxy) # 使用百度进行测试,如果能够访问,则说明代理ip可以用 async with session.get(self.test_api, proxy=real_proxy, timeout=15) as response: if response.status == 200: self._conn.put(proxy) print('Vaild proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Invaild proxy', proxy) def test(self): """ aio test all proxies. """ try: loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in self._raw_proxies ] loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('asyncio error')
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() async def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing', proxy) async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Invalid proxy', proxy) except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s: print(s) pass def test(self): """ aio test all proxies. """ print('ValidityTester is working') try: loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies] loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('Async Error')
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() async def test_single_proxy(self, proxy):#定义协程函数 """ text one proxy, if valid, put them to usable_proxies. """ try: async with aiohttp.ClientSession() as session:#aiohttp异步请求库,相当于s = requests.Session() ,创建一个session对象,然后用session对象去打开网页 try: if isinstance(proxy, bytes):#判断proxy是否是bytes类型 proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing', proxy) async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response:#相当于response=urllib.request.urlopen(url) if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Invalid proxy', proxy) except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s: print(s) pass def test(self): """ aio test all proxies. """ print('ValidityTester is working') loop = asyncio.get_event_loop()#启动协程函数 tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies] loop.run_until_complete(asyncio.wait(tasks))#tasks是一个asyncio.ensure_future(协程函数(参数))的列表,相当于多任务,异步执行tasks里的所有任务
class ApiTestCase(unittest.TestCase): def setUp(self): self._app = app.test_client() self._conn = RedisClient() def tearDown(self): self._conn.flush() def test_get(self): self._conn.put('aaa') self._conn.put('bbb') r = self._app.get('/get') assert 'aaa' in str(r.data) r = self._app.get('/get') assert 'bbb' in str(r.data) def test_count(self): self._conn.put('aaa') self._conn.put('bbb') r = self._app.get('/count') assert '2' in str(r.data) self._conn.put('ccc') self._conn.put('ddd') r = self._app.get('/count') assert '4' in str(r.data) proxy = self._conn.pop() r = self._app.get('/count') assert '3' in str(r.data)
class RedisClientTestCase(unittest.TestCase): def setUp(self): self._conn = RedisClient() def tearDown(self): self._conn.flush() def test_put_and_pop(self): self._conn.put("label") assert self._conn.pop() == "label" def test_put_many(self): self._conn.put_many(['a', 'b']) assert self._conn.pop() == "a" assert self._conn.pop() == "b" def test_len(self): self._conn.put_many(['a', 'b', 'c']) assert self._conn.queue_len == 3 def test_get(self): self._conn.put_many(['a', 'b', 'c', 'd']) _ = self._conn.get(2) assert self._conn.queue_len == 2
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None # 原始代理 self._usable_proxies = [] # 可用的代理 def set_raw_proxies(self, proxies): self._raw_proxies = proxies # 把接收到的proxies赋值给实例属性让内部使用 self._conn = RedisClient() # 创建redis连接 # 测试单个代理,async设置为异步函数(协程) async def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ try: # 创建一个会话对象session发请求 async with aiohttp.ClientSession() as session: try: # 设置代理为utf-8 if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') # 拼接代理地址 real_proxy = 'http://' + proxy print('Testing', proxy) # 用session发送get请求,用百度测试,设置代理检测 async with session.get( self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: # 如果响应状态码是200,说明代理可用,加入到代理队列右边 if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) # 如果出现未连接成功,超时,值错误,那么就打印提示信息 except (ProxyConnectionError, TimeoutError, ValueError): print('Invalid proxy', proxy) # 捕获里面异常 except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print(s) pass # 大量检测proxy def test(self): """ aio test all proxies. """ print('ValidityTester is working') try: # 协程不能直接运行,需要创建一个事件循环 loop = asyncio.get_event_loop() # 任务对象列表,把每个代理交给检测方法,生成一个任务对象,把这些任务对象放到一个任务对象列表中 tasks = [ self.test_single_proxy(proxy) for proxy in self._raw_proxies ] # 调用run_until_complete方法,将协程注册到事件循环中,并启动事件循环 loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('Async Error')
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() # async def test_single_proxy(self, proxy): # """ # text one proxy, if valid, put them to usable_proxies. # """ # try: # async with aiohttp.ClientSession() as session: # try: # if isinstance(proxy, bytes): # proxy = proxy.decode('utf-8') # print('正在测试:', proxy) # async with session.get(self.test_api, proxy=proxy, timeout=get_proxy_timeout) as response: # if response.status == 200: # self._conn.put(proxy) # print('可用代理:', proxy) # except Exception as e: # print(e) # print('不可用代理:', proxy) # except (ServerDisconnectedError, ClientResponseError,ClientConnectorError) as s: # print(s) # pass def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ try: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') # real_proxy = 'http://' + proxy print('正在测试:', proxy) # async with session.get(self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: pro = re.split(r'[\:\/\/]', proxy) proxy_dict = {pro[0]: pro[3] + ':' + pro[4]} response = requests.get(url=TEST_API, proxies=proxy_dict, timeout=5) if response.status_code == 200: self._conn.put(proxy) print('可用代理:', proxy) except Exception as e: print(e) print('不可用代理:', proxy) except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print(s) pass def test(self): """ aio test all proxies. """ print('进行代理可用性测试:') try: # loop = asyncio.get_event_loop() # tasks = [self.test_single_proxy(proxy) for proxy in self._raw_proxies] # loop.run_until_complete(asyncio.wait(tasks)) for proxy in self._raw_proxies: self.test_single_proxy(proxy) except Exception as e: print(e) print('Async Error!')
class ValidityTester(object): test_api = TEST_API def __init__(self): self._raw_proxies = None self._usable_proxies = [] def set_raw_proxies(self, proxies): self._raw_proxies = proxies self._conn = RedisClient() def set_timing_params(self): self._conn = RedisClient() self._all_ips_item = self._conn.getAll() #把现在所有的ip列表都拿出来做检查 self._post_url = ALIE_API async def test_single_proxy(self, proxy): """ text one proxy, if valid, put them to usable_proxies. """ try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing', proxy) async with session.get( self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Invalid proxy', proxy) except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print(s) pass def test(self): """ aio test all proxies. """ print('ValidityTester is working') try: loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in self._raw_proxies ] #test_single_proxy 检验ip是否有效 loop.run_until_complete(asyncio.wait(tasks)) #loop.run_until_complete(asyncio.gather(self.test_single_proxy(proxy) for proxy in self._raw_proxies)) except ValueError: print('Async Error') async def TimingCheckFunction(self, proxy): try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): #bytes=str proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy headers = {'User-Agent': choice(AGENTS)} print('Timing Check Async Ip:' + str(proxy)) async with session.get(self._post_url, proxy=real_proxy, timeout=get_proxy_timeout, headers=headers) as response: if (response.status != 200): self._conn.delete(proxy) print('Delete Old Invalid Proxy', proxy) else: print('Keep Save IP', proxy) except (ProxyConnectionError, TimeoutError, ValueError): print('Foreach Delete Invalid Proxy Error', proxy) self._conn.delete(proxy) except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print('-------') print(s) #self._conn.delete(proxy) pass def TimingCheck(self): try: loop = asyncio.get_event_loop() tasks = [ self.TimingCheckFunction(proxy) for proxy in self._all_ips_item ] #test_single_proxy 检验ip是否有效 loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('Timing Check Error')
class ValidityTester(object): """代理有效性测试器""" # 测试用的url test_api = TEST_API def __init__(self): # 为测试有效性的代理ip self._raw_proxies = None # 可用的代理ip self._usable_proxies = [] def set_raw_proxies(self, proxies): """ 设置为测试有效性的代理ip列表 :param proxies:代理ip列表 :return: """ self._raw_proxies = proxies self._conn = RedisClient() async def test_single_proxy(self, proxy): """ 测试一个代理,如果有效就添加到_usable_proxies列表中 :param proxy: 代理 :return: """ try: async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('Testing', proxy) async with session.get( self.test_api, proxy=real_proxy, timeout=get_proxy_timeout) as response: if response.status == 200: self._conn.put(proxy) print('Valid proxy', proxy) except (ProxyConnectionError, TimeoutError, ValueError, ClientOSError): print('Invalid proxy', proxy) except (ServerDisconnectedError, ClientResponseError, ClientConnectorError) as s: print('Invalid proxy', proxy) def test(self): """ 异步测试所用代理 """ print('ValidityTester is working') try: loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in self._raw_proxies ] loop.run_until_complete(asyncio.wait(tasks)) except ValueError: print('Async Error')