class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') print('正在测试', proxy) for url in TEST_URL: real_proxy = 'http://' + proxy async with session.get(url, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy, http=re.match(r'http[s]?', url).group()) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self,proxy): ''' 测试单个代理 :param proxy: :return: None ''' #连接 # 创建aiohttp的ClientSession对象,类似requests的session对象 async with aiohttp.ClientSession() as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试',proxy) async with session.get(TEST_URL,proxy=real_proxy,verify_ssl=False,timeout=60,allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用',proxy) else: self.redis.decrease(proxy) print('请求响应码不合法',proxy) except (ClientError, aiohttp.client_exceptions.ClientProxyConnectionError, asyncio.TimeoutError, AttributeError) as e: print(e.args) self.redis.decrease(proxy) print('代理请求失败',proxy) def run(self): ''' 测试主函数 :param self: :return: None ''' print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() '''对指定数量内的代理进行测试''' tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) # 输出刷新 每次返回一点 sys.stdout.flush() time.sleep(5) except Exception as e: print('测试台发生错误', e.args)
class Tester(object): """检测代理是否能行""" def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试当个代理 :param proxy:代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) # 防止ssl报错 async with aiohttp.ClientSession( connector=conn) as session: # 创建session try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = "http://" + proxy print("正在尝试:", real_proxy) async with session.get(test_url, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用", proxy) else: self.redis.decrease(proxy) print("请求响应不合法", proxy) except Exception as e: self.redis.decrease(proxy) print("代理请求失败", proxy, e.args) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.db = RedisClient() async def test_single_proxy(self, proxy): """ 使用异步请求库aiohttp对代理进行测试 :param proxy: :return: """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as r: if r.status == 200: self.db.max(proxy) print('代理可用:', proxy) else: self.db.exists(proxy) print('状态码不合法:', proxy) except Exception as e: self.db.decrease(proxy) print('代理请求异常:', proxy) def run(self): try: count = self.db.count() print('当前剩余{}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = self.db.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() # 刷新缓冲区 time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def single_proxy(self, proxy): async with aiohttp.ClientSession() as session: try: real_proxy = 'https://' + proxy print('正在测试:', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=10, allow_redirects=False, verify_ssl=False) as resp: if 200 == resp.status: self.redis.max(proxy) else: self.redis.decrease(proxy) print('请求错误:', resp.status, 'IP', proxy) except (ClientError, ClientProxyConnectionError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('开始检测ip:....') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i end = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', end, '个代理') test_proxies = self.redis.batch(start, end) loop = asyncio.get_event_loop() tasks = [self.single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('检测异常:', e.args) # r = Tester() # r.run()
class Tester(object): def __init__(self): self.redis = RedisClient() self.headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" } self.test_url = "" def parse_url(self, proxy): print("即将开始测试代理: ", proxy) if proxy.split(":")[0] == "http": proxies = {"http": proxy} self.test_url = "http://www.xinhuanet.com" else: proxies = {"https": proxy} self.test_url = "https://www.baidu.com" try: print(self.test_url) response = requests.get(self.test_url, proxies=proxies, headers=self.headers, timeout=3) if response.status_code == 200: self.redis.max(proxy) print("发现可用代理: ", proxy) else: self.redis.decrease(proxy) print("响应状态玛不合法: ", response.status_code, " proxy: ", proxy) except Exception as e: self.redis.decrease(proxy) print("请求发生错误: ", e, " proxy: ", proxy) def run(self): print("开始测试代理ip......") count = self.redis.count() print("当前剩余 ", count, " 个代理") for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print("正在测试第 ", start + 1, " - ", stop, "个代理") test_proxies = self.redis.batch(start, stop) for proxy in test_proxies: self.parse_url(proxy)
class Tester(): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(url=TEST_URL, proxy=real_proxy, timeout=5) as response: if response.status in VALID_STATUS_CODE: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except (ClientError, TimeoutError, AttributeError) as e: print(e.args) self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器开始运行') try: count = self.redis.count() print('当前代理剩余个数', count) loop = asyncio.get_event_loop() for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(count, i + BATCH_TEST_SIZE) test_proxies = self.redis.batch(start, stop) print('正在测试', start + 1, '-', stop, '个代理') tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(2) except Exception as e: print('测试器发生错误', e.args)
class Tester: def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf8") real_proxy = "http://"+proxy print(f"testing {proxy}") async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print(f"{proxy} can use.") else: self.redis.decrease(proxy) print(f"request is invalid {response.status} by proxy ip {proxy}") except (ClientError, aiohttp.client_exceptions.ClientConnectionError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print(f"request is failed by proxy ip {proxy}") def run(self): print("tester is begining") try: count = self.redis.count() print(f"hold {count} proxies") for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i+BATCH_TEST_SIZE, count) print(f"will test {start+1} - {stop} proxies") test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print(f"test error, {e.args}")
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, headers = base_header, proxy=real_proxy, timeout=3, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用',proxy) else: self.redis.decrease(proxy) print('请求错误',response.status,proxy) except (asyncio.TimeoutError,aiohttp.client_exceptions.ClientProxyConnectionError,aiohttp.ClientError): self.redis.decrease(proxy) print('代理请求失败',proxy) def run(self): print('测试器开始运行') try: count = self.redis.count() print('当前剩下代理数量:',count) for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE ,count) print('正在测试第', start + 1, '-', stop, '的代理') test_proxies = self.redis.batch(start,stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.db = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf-8") real_proxy = "http://" + proxy async with session.get(url=TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALUE_CODE: print("代理", proxy, "有效,分数置为100") self.db.max(proxy) else: print("代理", proxy, "响应码,分数减1") self.db.decrease(proxy) except Exception: print("代理", proxy, "请求出错,分数减1") self.db.decrease(proxy) def run(self): count = self.db.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH): start = i end = min(i + BATCH, count) proxies = self.db.batch(start, end) print('正在测试第', start + 1, '-', end, '个代理') loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5)
class Tester(object): def __init__(self): self.redis = RedisClient() #测试单个代理 async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get( TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False, headers= { 'Host': 'xueqiu.com', 'Referer': 'https://xueqiu.com/u/8205178197', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest', 'Cookie': 'device_id=5d27463e2df6a534e7ecba029eb95e29; xq_a_token=f89219d7e7ee863a5773244ad9d2db6e3dc5ea38; xq_r_token=8bdf53186f54b2c5c885621e64fd4d728f3111e0;', }) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) #测试启动 def run(self): print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') #批量测试 for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() #函数前面加 async 表示此函数是异步的 async def test_single_proxy(self, proxy): ''' 测试单个代理 :param proxy: :return: ''' conn = aiohttp.TCPConnector( verify_ssl=False) #获取请求,verify_ssl=False防止ssl证书报错 async with aiohttp.ClientSession( connector=conn ) as session: #创建一个session对象(session用于存储特定对话所需信息) try: if isinstance(proxy, bytes): #判断proxy是不是bytes类型 proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get( TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False ) as response: #allow_redirects=False禁止重定向 if response.status in VALID_STATUS_CODES: self.redis.max(proxy) #调用db的max()方法将score设为100 print('代理可用', proxy) else: self.redis.decrease(proxy) #调用db的decrease方法将score减一 print('请求响应码不合法', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): ''' 测试主函数 :return: ''' print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): # 步长为100 start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch( start, stop) # 调用db的batch()获取100个代理列表从高到低排列 loop = asyncio.get_event_loop() # 获取EventLoop tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait( tasks)) #执行异步任务tasks。在等待网站返回的时候去执行另一个任务,网站返回后跳回任务继续执行原任务 sys.stdout.flush() #输出实时信息,而不是等待运行完毕后输出 time.sleep(5) except Exception as e: print('测试器发送错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() self.headers = { 'User-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36', } # async def test_single_proxy(self, proxy): def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ # conn = aiohttp.TCPConnector(verify_ssl=False) # async with aiohttp.ClientSession(connector=conn) as session: # try: # if isinstance(proxy, bytes): # proxy = proxy.decode('utf-8') # real_proxy = 'http://' + proxy # print('正在测试', proxy) # async with session.get(TEST_URL, proxy=real_proxy, timeout=5, allow_redirects=False) as response: # if response.status in VALID_STATUS_CODES: # self.redis.max(proxy) # print('代理可用', proxy) # else: # self.redis.decrease(proxy) # print('请求响应码不合法 ', response.status, 'IP', proxy) # except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): # self.redis.decrease(proxy) # print('代理请求失败', proxy) # if isinstance(proxy, bytes): # proxy = proxy.decode('utf-8') ip = proxy.split(':')[0] port = proxy.split(':')[1] print('正在测试', proxy) try: conn = http.client.HTTPConnection(ip, port, timeout=5.0) conn.request(method='GET', url=TETS_URL, headers=self.headers) res = conn.getresponse() print("+++Success:" + proxy) self.redis.max(proxy) except: print("---Failure:" + proxy) self.redis.decrease(proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) # loop = asyncio.get_event_loop() # tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] for proxy in test_proxies: self.test_single_proxy(proxy) time.sleep(0.5) # loop.run_until_complete(asyncio.wait(tasks)) # sys.stdout.flush() # time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 """ # 如果proxy是字节类型的,以utf-8格式解码 if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy # 不验证SSL conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: # 访问httpbin async with session.get(TEST_URL, proxy=real_proxy, timeout=7, allow_redirects=False) as req: # # 获取相应内容 # response_content = await req.json() # ip_response = response_content['origin'] # # 获取访问IP # juege_proxy = re.search('(.*):', proxy).group(1) # # 判断访问IP是否与代理一致 # if ip_response == juege_proxy: # # 代理分值设置为最高 # self.redis.max(proxy) # # print('代理可用', proxy) if req.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except Exception as e: # print(e.args) # 有异常则代理分数减一 self.redis.decrease(proxy) # print('代理不可用,分值-1', proxy) def run(self): """ 批量测试代理 """ try: # 获取当前代理池代理数量 count = self.redis.count() print('当前共有', count, '个代理!') # 批量测试代理 for i in range(0, count, BATCH_SIZE): start = i stop = min(i + BATCH_SIZE, count - 1) print('正在测试第', start + 1, '-', stop, '个代理!') proxies_list = self.redis.batch(start, stop) # 启用一个事件循环 loop = asyncio.get_event_loop() # 把携程对象封装为task task = [ self.test_single_proxy(proxy) for proxy in proxies_list ] # 运行 loop.run_until_complete(asyncio.wait(task)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() #这是一个异步的方法 async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ conn = aiohttp.TCPConnector(ssl=False) #建立一个session对象 #session可以进行多项操作,比如post, get, put, head等 async with aiohttp.ClientSession(connector=conn) as session: #检查如果是字节类型就解码 try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) #利用session对象去get async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: #如果状态码有效 if response.status in VALID_STATUS_CODES: #状态码值设置为最大 self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法 ', response.status, 'IP', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: """ print('测试器开始运行') try: count = self.redis.count() print('当前剩余', count, '个代理') #取出数量为BATCH_TEST_SIZE的proxy for i in range(0, count, BATCH_TEST_SIZE): start = i #这样可以取到最后一个proxy stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) #调用这个方法可以避免“进程已经运行”这个错误 nest_asyncio.apply() #主线程调用asyncio.get_event_loop()时会创建事件循环 loop = asyncio.get_event_loop() # #tasks为异步的任务,列表里面生成的为coroutine(协程)元素 tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] #把异步的任务丢给这个循环的run_until_complete()方法 loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Verify: def __init__(self): self.db = RedisClient() async def verify_proxy(self, redis_key, proxy): ''' 验证一个代理IP :param proxy: :return: ''' if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') re_proxy = 'http://' + proxy conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: async with session.get(setting.TEST_URL, proxy=re_proxy, timeout=6, allow_redirects=False) as resp: if resp.status in [200, 302]: print("{}||{}池:{}: ok 100点".format( time.ctime(), redis_key, proxy)) self.db.max(redis_key, proxy) else: print("{}||{}池:{}: fail -1点".format( time.ctime(), redis_key, proxy)) self.db.decrease(redis_key, proxy) except (aiohttp.ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError) as e: print("{}||{}池:{}: error -1点".format(time.ctime(), redis_key, proxy)) self.db.decrease(redis_key, proxy) # async def run_by_redis(self, redis_key): # count = self.db.count(redis_key) # print(redis_key, '当前剩余', count, '个代理') # for i in range(0, count, setting.TEST_SIZE): # start = i # end = min(i + setting.TEST_SIZE, count) - 1 # print('正在测试{}第'.format(redis_key), start + 1, '-', end + 1, '个代理') # proxies = self.db.batch(redis_key, start, end) # for proxy in proxies: # await self.verify_proxy(redis_key, proxy) # # def run(self): # print("开始验证代理") # try: # tasks = [ # self.run_by_redis(setting.REDIS_KEY_HTTP), # self.run_by_redis(setting.REDIS_KEY_HTTPS) # ] # loop = asyncio.get_event_loop() # loop.run_until_complete(asyncio.wait(tasks)) # time.sleep(5) # except Exception as e: # print('验证程序运行错误: ', e) def run_verify_http(self, part): stime = time.time() count = self.db.count(setting.REDIS_KEY_HTTP) start = part * (count // 4) stop = start + (count // 4) if part == 3: stop = count try: logger.info("{}开始验证{}-{}".format(setting.REDIS_KEY_HTTP, start, stop)) for i in range(start, stop, setting.HTTP_VERIFY_SIZE): proxies = self.db.batch(setting.REDIS_KEY_HTTP, i, i + setting.HTTP_VERIFY_SIZE) loop = asyncio.get_event_loop() tasks = [ self.verify_proxy(setting.REDIS_KEY_HTTP, proxy) for proxy in proxies ] loop.run_until_complete(asyncio.wait(tasks)) logger.info("{}验证完成{}-{}耗时:{}".format(setting.REDIS_KEY_HTTP, start, stop, time.time() - stime)) except Exception as e: logger.info('{}验证报错{}-{}:{}'.format(setting.REDIS_KEY_HTTP, start, stop, e)) def run_verify_https(self): stime = time.time() try: logger.info("{}开始验证".format(setting.REDIS_KEY_HTTPS)) count = self.db.count(setting.REDIS_KEY_HTTPS) for i in range(0, count, setting.HTTP_VERIFY_SIZE): proxies = self.db.batch(setting.REDIS_KEY_HTTPS, i, i + setting.HTTP_VERIFY_SIZE) loop = asyncio.get_event_loop() tasks = [ self.verify_proxy(setting.REDIS_KEY_HTTPS, proxy) for proxy in proxies ] loop.run_until_complete(asyncio.wait(tasks)) logger.info("{}验证完成,耗时:{}".format(setting.REDIS_KEY_HTTPS, time.time() - stime)) except Exception as e: logger.warning('{}验证报错:{}'.format(setting.REDIS_KEY_HTTPS, e))