class Test_ip(object): def __init__(self): self.db = RedisClient() self.headers = headers self.url = test_url def get_url(self,proxy): try: con = requests.get(self.url,headers = self.headers,proxies = proxy) if con.status_code==200: return True else: return False except: return False def test(self,ip): ip = ip.decode('utf-8') proxy = {'http':'http://'+ip} test_result = self.get_url(proxy) if test_result: self.db.max(ip,) else: self.db.decrease(ip) def run(self): proxies = self.db.all() for i in range(len(proxies)): ip = proxies[i] t = threading.Thread(target=self.test,args=(ip,)) t.setDaemon(True) t.start() random_time() if i%100==0: time.sleep(5)
class PoolTester(object): def __init__(self): self.redis = RedisClient() def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ if test_proxy_vaild(proxy): self.redis.max(proxy) print("[+] 代理可用", proxy) else: self.redis.drop(proxy) print("[-] 代理不可用", proxy) def run(self): """ 测试的主函数 :return: """ print("测试器开始运行.......") try: count = self.redis.count() print("当前剩余%d个代理" % (count)) # 使用线程池, 快速检测proxy是否可用 with ThreadPoolExecutor(FilterTreadCount) as pool: pool.map(self.test_single_proxy, self.redis.all()) except Exception as e: print("测试器发生错误", e)
class PoolTester(object): def __init__(self): self.redis = RedisClient() def testSingleProxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ if testProxyVaild(proxy): self.redis.max(proxy) print(Fore.GREEN + "[+] 代理可用", proxy) else: self.redis.drop(proxy) print(Fore.RED + "[-] 代理不可用", proxy) def run(self): """ 测试的主函数 :return: """ print(Fore.GREEN + "测试器开始运行.......") try: count = self.redis.count() print(Fore.GREEN + "当前剩余%d个代理" % count) # 使用线程池, 快速检测proxy是否可用 with ThreadPoolExecutor(FILTER_THREAD_COUNT) as pool: pool.map(self.testSingleProxy, self.redis.all()) except Exception as e: print(Fore.RED + "测试器发生错误", e)
class Tester(): """ 验证代理 """ def __init__(self): self.redis = RedisClient() def test_single_proxy(self,proxy): """ 测试单个代理 """ try: response = requests.get(url=TEST_URL, timeout=5) if response.status_code in VALID_STATUS_CODES: self.redis.max(proxy) #测试成功 将代理分数设置到最大 print('测试成功', proxy, time.strftime('%Y-%m-%d %H:%M', time.localtime(time.time()))) else: #print('代理测试失败',proxy, response.status_code) self.redis.decrease(proxy) except RequestException: print('代理测试请求异常', proxy) self.redis.decrease(proxy) def run(self): print('测试器开始 测试代理%d个' % self.redis.count(), time.strftime('%Y-%m-%d %H-%M',time.localtime(time.time()))) # 从数据库获取全部 proxies = self.redis.all() for proxy in proxies: self.test_single_proxy(proxy)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self,proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试',proxy) async with session.get(TEST_URL,proxy=real_proxy,timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用",proxy) else: self.redis.decrease(proxy) print("请求响应码不合法",proxy) except (Exception): self.redis.decrease(proxy) print("代理请求失败",proxy) def run(self): print("测试器开始运行") try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0,len(proxies),BATCH_TEST_SIZE): test_proxies = proxies[i:i+BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print("测试器发生错误",e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') print('正在测试', proxy) for url in TEST_URL: real_proxy = 'http://' + proxy async with session.get(url, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy, http=re.match(r'http[s]?', url).group()) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): ''' 测试单个代理 :param proxy: 单个代理 :return: None ''' conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: # 判断一个对象是否是一直类型 if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试...') async with session.get(test_url, proxy=real_proxy, timeout=15) as response: if response.status in valid_status_codes: self.redis.max(proxy) # 代理可用,代理设置为最大值 print('代理可用') else: self.redis.decrease(proxy) print('请求响应码不合法,代理检测失败') except Exception as e: self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): ''' 测试主函数 :return: None ''' print('测试器开始运行>>>>>>') try: proxie = self.redis.all() # 获取全部代理 loop = asyncio.get_event_loop() # asyncio实现并发,就需要多个协程组成列表来完成任务【创建多个协程的列表,然后将这些协程注册到事件循环中】, # 每当有任务阻塞的时候就await,然后其他协程继续工作,所以下面是协程列表; # 所谓的并发:多个任务需要同时进行; # 批量测试 for i in range(0, len(proxie), batch_test_size): test_proxies = proxie[i:i + batch_test_size] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) print('检测完成') except Exception as e: print('测试发生错误!!', e)
class Tester(object): """检测代理是否能行""" def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试当个代理 :param proxy:代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) # 防止ssl报错 async with aiohttp.ClientSession( connector=conn) as session: # 创建session try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = "http://" + proxy print("正在尝试:", real_proxy) async with session.get(test_url, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用", proxy) else: self.redis.decrease(proxy) print("请求响应不合法", proxy) except Exception as e: self.redis.decrease(proxy) print("代理请求失败", proxy, e.args) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self,proxy): ''' 测试单个代理 :param proxy: :return: None ''' #连接 # 创建aiohttp的ClientSession对象,类似requests的session对象 async with aiohttp.ClientSession() as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试',proxy) async with session.get(TEST_URL,proxy=real_proxy,verify_ssl=False,timeout=60,allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用',proxy) else: self.redis.decrease(proxy) print('请求响应码不合法',proxy) except (ClientError, aiohttp.client_exceptions.ClientProxyConnectionError, asyncio.TimeoutError, AttributeError) as e: print(e.args) self.redis.decrease(proxy) print('代理请求失败',proxy) def run(self): ''' 测试主函数 :param self: :return: None ''' print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() '''对指定数量内的代理进行测试''' tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) # 输出刷新 每次返回一点 sys.stdout.flush() time.sleep(5) except Exception as e: print('测试台发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: 单个代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: None """ print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() count = self.redis.count() print(count) # 批量测试 for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester: def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: 单个代理 :return: None """ if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy conn = aiohttp.TCPConnector(verify_ssl=False) try: async with aiohttp.ClientSession(headers=settings.headers, connector=conn) as session: print('正在测试', proxy) rsp = await session.get(settings.target_url, proxy=real_proxy, timeout=5) if rsp.status == 200: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('代理请求失败', proxy) raise HttpProcessingError(code=rsp.status, message=rsp.reason) except Exception as e: self.redis.decrease(proxy) print('代理请求失败', proxy) print(e.__cause__) def run(self): """ 测试主函数 :return: None """ print('开始测试...') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(proxies), settings.test_request_count): test_proxies = proxies[i:i + settings.test_request_count] task = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(task)) time.sleep(5) except Exception as e: print('测试出现异常', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=10) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) print('请求响应不合法', proxy) except (ClientConnectionError, ClientError, ConnectTimeout): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0,len(proxies),BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args) def test_single_tread(self, proxy): real_proxy = {'https': 'https://' + proxy} print('测试', real_proxy) try: res = requests.get(TEST_URL, proxies=real_proxy, timeout=10) if res.status_code in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ConnectionError, ConnectTimeout): self.redis.decrease(proxy) print('代理请求失败', proxy) def new_run(self): for ip in self.redis.all(): self.test_single_tread(ip)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print( 'IP', proxy,'请求响应码不合法 ') except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0,len(proxies),BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) ''' count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] print(tasks) loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) ''' except Exception as e: print(e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() """ 测试类,会根据提供的测试IP地址来判断代理是否可用 """ async def test_single_proxy(self, proxy): """ 测试单个代理 @param proxy: 单个代理 return None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: real_proxy = 'http://%s' % proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=2) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ClientError, ClientConnectorError, TimeoutError, AttributeError): self.redis.decrease(proxy) def run(self): """ 测试主函数 """ print('Starting test') try: # 获取所有的代理 proxies = self.redis.all() loop = asyncio.get_event_loop() # 批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('Tester errors', e.args)
class Tester(object): def __init__(self): self.db = RedisClient() async def test_single_proxy(self, proxy): """ 使用异步请求库aiohttp对代理进行测试 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as r: if r.status == 200: self.db.max(proxy) print('代理可用:', proxy) else: self.db.exists(proxy) print('状态码不合法:', proxy) except Exception as e: self.db.decrease(proxy) print('代理请求异常:', proxy) def run(self): try: count = self.db.count() print('当前剩余{}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = self.db.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() # 刷新缓冲区 time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def single_proxy(self, proxy): async with aiohttp.ClientSession() as session: try: real_proxy = 'https://' + proxy print('正在测试:', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=10, allow_redirects=False, verify_ssl=False) as resp: if 200 == resp.status: self.redis.max(proxy) else: self.redis.decrease(proxy) print('请求错误:', resp.status, 'IP', proxy) except (ClientError, ClientProxyConnectionError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('开始检测ip:....') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i end = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', end, '个代理') test_proxies = self.redis.batch(start, end) loop = asyncio.get_event_loop() tasks = [self.single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('检测异常:', e.args) # r = Tester() # r.run()
class Tester(object): def __init__(self): self.redis = RedisClient() self.headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } self.test_url = "" def parse_url(self, proxy): print("即将开始测试代理: ", proxy) if proxy.split(":")[0] == "http": proxies = {"http": proxy} self.test_url = "http://www.xinhuanet.com" else: proxies = {"https": proxy} self.test_url = "https://www.baidu.com" try: print(self.test_url) response = requests.get(self.test_url, proxies=proxies, headers=self.headers, timeout=3) if response.status_code == 200: self.redis.max(proxy) print("发现可用代理: ", proxy) else: self.redis.decrease(proxy) print("响应状态玛不合法: ", response.status_code, " proxy: ", proxy) except Exception as e: self.redis.decrease(proxy) print("请求发生错误: ", e, " proxy: ", proxy) def run(self): print("开始测试代理ip......") count = self.redis.count() print("当前剩余 ", count, " 个代理") for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print("正在测试第 ", start + 1, " - ", stop, "个代理") test_proxies = self.redis.batch(start, stop) for proxy in test_proxies: self.parse_url(proxy)
class Tester(object): # 定义一个类Tester def __init__(self): self.redis = RedisClient() # 建立一个RedisClient对象,供该对象中其他方法使用 async def test_single_proxy(self): # 定义test_single_proxy方法,检测单个代理的可用情况,参数就是被检测的代理,async异步 """ 测试单个代理 :param proxy: 单个代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: # 创建aiohttp的ClientSession对象, try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: # 通过proxy传递get方法 # 测试的链接定义为常量TEST_URL,对某个网站有抓取需求,将TEST_URL设置为目标网站地址 if response.status in VALID_STATUS_CODES: # 定义VALID_STATUS_CODES变量,是列表形式,包含正常状态码 self.redis.max(proxy) # 调用RedisClient的max方法将代理分数设置为100 print('代理可用', proxy) else: self.redis.decrease(proxy) # 否则调用decrease方法将代理分数减1 print('请求相应码不合法', proxy) except (ClientError, ClientConnectorError, TimeoutError, AttributeError): self.redis.decrease(proxy) # 出现异常也同样将代理分数减1 print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: None """ print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() # 批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf-8") real_proxy = "http://" + proxy print("正在测试", proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用", proxy) else: self.redis.decrease(proxy) print("请求响应码不合法", proxy) except (aiohttp.ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print("代理请求失败", proxy) def run(self): print("测试器开始执行") try: proxies = self.redis.all() count = self.redis.count() loop = asyncio.get_event_loop() for i in range(0, count, BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print("测试器发生错误", e.args)
class My_Tester(): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): async with aiohttp.ClientSession() as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy async with session.get(url=TEST_URL, proxy=real_proxy, timeout=5) as response: if response.status == 200: print('代理有效', proxy) print('当前代理数', self.redis.count()) self.redis.max(proxy) else: print('代理无效', proxy, '状态码', response.status) #self.redis.decrease(proxy) except Exception as e: print('Error', e.args) #self.redis.decrease(proxy) def run(self): print('测试器开始运行') proxies = [] crawler = Crawler() for proxy in crawler.crawl_ip3366(): proxies.append(proxy) for proxy in crawler.crawl_xicidaili(): proxies.append(proxy) loop = asyncio.get_event_loop() try: for i in range(0, len(proxies), 10): tasks = [ self.test_single_proxy(proxy) for proxy in proxies[i:i + 10] ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(2) loop.close() except Exception as e: print('测试错误', e.args)
class Tester: """检测代理池中代理是否可用,可用则分数至为100,否则分数减一""" def __init__(self): self.redis = RedisClient() async def single_test(self, proxy): """单个代理测试""" conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = "http://" + proxy print("正在测试:", proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status_code in VALID_STATUS_CODES: self.redis.max(proxy) print(proxy, '代理可用') else: self.redis.decrease(proxy) print(proxy, 'IP 请求响应码不合法') except (ClientError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """异步测试""" print('开始测试代理') try: count = self.redis.count() print('当前剩余', count, '个代理') proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = proxies[start:stop] tasks = [self.single_test(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(randint(1, 5)) except Exception as e: print('测试器发生错误', e.args)
class Tester(): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(url=TEST_URL, proxy=real_proxy, timeout=5) as response: if response.status in VALID_STATUS_CODE: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except (ClientError, TimeoutError, AttributeError) as e: print(e.args) self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器开始运行') try: count = self.redis.count() print('当前代理剩余个数', count) loop = asyncio.get_event_loop() for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(count, i + BATCH_TEST_SIZE) test_proxies = self.redis.batch(start, stop) print('正在测试', start + 1, '-', stop, '个代理') tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(2) except Exception as e: print('测试器发生错误', e.args)
class Tester: def __init__(self): """初始化数据库管理对象""" self.redis = RedisClient() async def test_one_proxy(self, proxy): """对目标网站测试一个代理是否可用""" conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): # 解码为字符串 proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=30) as response: if response.status in TRUE_STATUS_CODE: # 代理可用 self.redis.max(proxy) print(proxy, 100, '可用') else: # 代理不可用 self.redis.decrease(proxy) print(proxy, -1, "状态码错误") except Exception as e: self.redis.decrease(proxy) print(proxy, -1, e.args) async def start(self): """启动协程, 测试所有代理""" try: proxies = self.redis.all() for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_one_proxy(proxy) for proxy in test_proxies] await asyncio.gather(*tasks) time.sleep(5) except Exception as e: print('测试器发生错误', e.args) def run(self): asyncio.run(self.start())
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (aiohttp.ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) def run(self): print('测试器开始运行!') try: proxies = self.redis.all() count = len(proxies) print('当前共有{0}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i end = min(i + BATCH_TEST_SIZE, count) test_proxies = proxies[start:end] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """Test single proxy""" try: conn = TCPConnector(verify_ssl=False) async with ClientSession(connector=conn) as session: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试: ', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=10) \ as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用: ', proxy) else: self.redis.derease(proxy) print('请求响应不合法: ', proxy) except(ClientError, ClientConnectorError, TimeoutError, \ AttributeError): self.redis.decrease(proxy) print('代理请求失败: ', proxy) def run(self): """Test function""" print('开始测试...') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for index in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[index:index + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试发生错误', e.args)
class Tester_2: def __init__(self): self.redis = RedisClient() self.headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/73.0.3683.103 Safari/537.36' } def single_test(self, proxy): proxies = {'http': 'http://' + proxy, 'https': 'https://' + proxy} try: resp = requests.get(TEST_URL, proxies=proxies, headers=self.headers) if resp.status_code == 200: print(proxy, '代理可用') self.redis.max(proxy) else: self.redis.decrease(proxy) print(proxy, 'IP 请求响应码不合法') except ConnectionError: print('代理请求失败', proxy) self.redis.decrease(proxy) def run(self): count = self.redis.count() print('共有', count, '个代理') print('开始检测代理') proxies = self.redis.all() i = 0 try: for proxy in proxies: print(proxy) i += 1 self.single_test(proxy) time.sleep(randint(1, 5)) if i == 15: break except Exception as e: print('测试器发生错误', e)
class Tester: def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf8") real_proxy = "http://"+proxy print(f"testing {proxy}") async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print(f"{proxy} can use.") else: self.redis.decrease(proxy) print(f"request is invalid {response.status} by proxy ip {proxy}") except (ClientError, aiohttp.client_exceptions.ClientConnectionError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print(f"request is failed by proxy ip {proxy}") def run(self): print("tester is begining") try: count = self.redis.count() print(f"hold {count} proxies") for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i+BATCH_TEST_SIZE, count) print(f"will test {start+1} - {stop} proxies") test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print(f"test error, {e.args}")
class Tester(): def __init__(self): self.redis = RedisClient() async def single_test(self, proxy): # try connecting with single proxy conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: real_proxy = "https://" + proxy.string() print("testing", proxy) async with session.get(TEST_URL, allow_redirects=False, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODE: self.redis.max(proxy) print("Available proxy:", proxy) else: self.redis.decrease(proxy) print("Not Available Status:", proxy, " Score -1") except (aiohttp.ClientError, aiohttp.ClientConnectorError, TimeoutError, AttributeError, aiohttp.ClientOSError, aiohttp.ClientHttpProxyError): self.redis.decrease(proxy) print("Error detected!", proxy) def run(self): print("Starts running tester") try: entries = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(entries), 200): test_proxies = entries[i:i + BATCH_TEST_SIZE] tasks = [self.single_test(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print(' Error with tester ', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, headers = base_header, proxy=real_proxy, timeout=3, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用',proxy) else: self.redis.decrease(proxy) print('请求错误',response.status,proxy) except (asyncio.TimeoutError,aiohttp.client_exceptions.ClientProxyConnectionError,aiohttp.ClientError): self.redis.decrease(proxy) print('代理请求失败',proxy) def run(self): print('测试器开始运行') try: count = self.redis.count() print('当前剩下代理数量:',count) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE ,count) print('正在测试第', start + 1, '-', stop, '的代理') test_proxies = self.redis.batch(start,stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """测试单个代理""" conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except (ClientProxyConnectionError, ServerDisconnectedError, ClientOSError, ClientHttpProxyError, TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """测试主函数""" print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(1, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)