class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=10) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) print('请求响应不合法', proxy) except (ClientConnectionError, ClientError, ConnectTimeout): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0,len(proxies),BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args) def test_single_tread(self, proxy): real_proxy = {'https': 'https://' + proxy} print('测试', real_proxy) try: res = requests.get(TEST_URL, proxies=real_proxy, timeout=10) if res.status_code in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ConnectionError, ConnectTimeout): self.redis.decrease(proxy) print('代理请求失败', proxy) def new_run(self): for ip in self.redis.all(): self.test_single_tread(ip)
class ValidTester(object): def __init__(self, website='default'): """ 父类,初始化一些对象 :param website: 名称 """ self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) def test(self, username, cookies): """ 测试Cookies是否有效,子类需要重写 :param username: 用户名 :param cookies: cookies """ raise NotImplementedError def run(self): """ 运行,测试所有cookies是否有效 """ cookies_groups = self.cookies_db.all() # print(cookies_groups) for username, cookies in cookies_groups.items(): # print(username, cookies) self.test(username, cookies)
class Test_ip(object): def __init__(self): self.db = RedisClient() self.headers = headers self.url = test_url def get_url(self,proxy): try: con = requests.get(self.url,headers = self.headers,proxies = proxy) if con.status_code==200: return True else: return False except: return False def test(self,ip): ip = ip.decode('utf-8') proxy = {'http':'http://'+ip} test_result = self.get_url(proxy) if test_result: self.db.max(ip,) else: self.db.decrease(ip) def run(self): proxies = self.db.all() for i in range(len(proxies)): ip = proxies[i] t = threading.Thread(target=self.test,args=(ip,)) t.setDaemon(True) t.start() random_time() if i%100==0: time.sleep(5)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self,proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy,bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试',proxy) async with session.get(TEST_URL,proxy=real_proxy,timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用",proxy) else: self.redis.decrease(proxy) print("请求响应码不合法",proxy) except (Exception): self.redis.decrease(proxy) print("代理请求失败",proxy) def run(self): print("测试器开始运行") try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0,len(proxies),BATCH_TEST_SIZE): test_proxies = proxies[i:i+BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print("测试器发生错误",e.args)
class PoolTester(object): def __init__(self): self.redis = RedisClient() def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ if test_proxy_vaild(proxy): self.redis.max(proxy) print("[+] 代理可用", proxy) else: self.redis.drop(proxy) print("[-] 代理不可用", proxy) def run(self): """ 测试的主函数 :return: """ print("测试器开始运行.......") try: count = self.redis.count() print("当前剩余%d个代理" % (count)) # 使用线程池, 快速检测proxy是否可用 with ThreadPoolExecutor(FilterTreadCount) as pool: pool.map(self.test_single_proxy, self.redis.all()) except Exception as e: print("测试器发生错误", e)
class PoolTester(object): def __init__(self): self.redis = RedisClient() def testSingleProxy(self, proxy): """ 测试单个代理 :param proxy: :return: """ if testProxyVaild(proxy): self.redis.max(proxy) print(Fore.GREEN + "[+] 代理可用", proxy) else: self.redis.drop(proxy) print(Fore.RED + "[-] 代理不可用", proxy) def run(self): """ 测试的主函数 :return: """ print(Fore.GREEN + "测试器开始运行.......") try: count = self.redis.count() print(Fore.GREEN + "当前剩余%d个代理" % count) # 使用线程池, 快速检测proxy是否可用 with ThreadPoolExecutor(FILTER_THREAD_COUNT) as pool: pool.map(self.testSingleProxy, self.redis.all()) except Exception as e: print(Fore.RED + "测试器发生错误", e)
class Tester(): """ 验证代理 """ def __init__(self): self.redis = RedisClient() def test_single_proxy(self,proxy): """ 测试单个代理 """ try: response = requests.get(url=TEST_URL, timeout=5) if response.status_code in VALID_STATUS_CODES: self.redis.max(proxy) #测试成功 将代理分数设置到最大 print('测试成功', proxy, time.strftime('%Y-%m-%d %H:%M', time.localtime(time.time()))) else: #print('代理测试失败',proxy, response.status_code) self.redis.decrease(proxy) except RequestException: print('代理测试请求异常', proxy) self.redis.decrease(proxy) def run(self): print('测试器开始 测试代理%d个' % self.redis.count(), time.strftime('%Y-%m-%d %H-%M',time.localtime(time.time()))) # 从数据库获取全部 proxies = self.redis.all() for proxy in proxies: self.test_single_proxy(proxy)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): ''' 测试单个代理 :param proxy: 单个代理 :return: None ''' conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: # 判断一个对象是否是一直类型 if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试...') async with session.get(test_url, proxy=real_proxy, timeout=15) as response: if response.status in valid_status_codes: self.redis.max(proxy) # 代理可用,代理设置为最大值 print('代理可用') else: self.redis.decrease(proxy) print('请求响应码不合法,代理检测失败') except Exception as e: self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): ''' 测试主函数 :return: None ''' print('测试器开始运行>>>>>>') try: proxie = self.redis.all() # 获取全部代理 loop = asyncio.get_event_loop() # asyncio实现并发,就需要多个协程组成列表来完成任务【创建多个协程的列表,然后将这些协程注册到事件循环中】, # 每当有任务阻塞的时候就await,然后其他协程继续工作,所以下面是协程列表; # 所谓的并发:多个任务需要同时进行; # 批量测试 for i in range(0, len(proxie), batch_test_size): test_proxies = proxie[i:i + batch_test_size] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) print('检测完成') except Exception as e: print('测试发生错误!!', e)
class Checker(object): def __init__(self): self.db = RedisClient() self.counts = defaultdict(int) def check(self, proxy): """ 测试代理,返回测试结果 :param proxy: 代理 :return: 测试结果 """ try: response = requests.get(settings.TEST_URL, proxies={ 'http': 'http://' + proxy, 'https': 'https://' + proxy }, timeout=settings.TEST_TIMEOUT) logger.debug(f'Using {proxy} to test {settings.TEST_URL}...') if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def run(self): """ 测试一轮 :return: """ proxies = self.db.all() logger.info(f'Try to get all proxies {proxies}') for name, proxy in proxies.items(): # 检测无效 if not self.check(proxy): logger.info(f'Proxy {proxy} invalid') self.counts[proxy] += 1 else: logger.info(f'Proxy {proxy} valid') count = self.counts.get(proxy) or 0 logger.debug( f'Count {count}, TEST_MAX_ERROR_COUNT {settings.TEST_MAX_ERROR_COUNT}' ) if count >= settings.TEST_MAX_ERROR_COUNT: self.db.remove(name) def loop(self): """ 循环测试 :return: """ while True: logger.info('Check for infinite') self.run() logger.info(f'Tested, sleeping for {settings.TEST_CYCLE}s...') time.sleep(settings.TEST_CYCLE)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: 单个代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: None """ print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() count = self.redis.count() print(count) # 批量测试 for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester: def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """ 测试单个代理 :param proxy: 单个代理 :return: None """ if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy conn = aiohttp.TCPConnector(verify_ssl=False) try: async with aiohttp.ClientSession(headers=settings.headers, connector=conn) as session: print('正在测试', proxy) rsp = await session.get(settings.target_url, proxy=real_proxy, timeout=5) if rsp.status == 200: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('代理请求失败', proxy) raise HttpProcessingError(code=rsp.status, message=rsp.reason) except Exception as e: self.redis.decrease(proxy) print('代理请求失败', proxy) print(e.__cause__) def run(self): """ 测试主函数 :return: None """ print('开始测试...') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(proxies), settings.test_request_count): test_proxies = proxies[i:i + settings.test_request_count] task = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(task)) time.sleep(5) except Exception as e: print('测试出现异常', e.args)
class ValidTester(object): def __init__(self, website='default'): self.website = website self.cookies_db = RedisClient('cookies', self.website) self.accounts_db = RedisClient('accounts', self.website) def test(self, username, cookies): raise NotImplementedError def run(self): cookies_group = self.cookies_db.all() for username, cookies in cookies_group: self.test(username, cookies)
class Tester(): def __init__(self, website="default"): self.website = website self.cookie_db = RedisClient('cookies', self.website) self.account_db = RedisClient('accounts', self.website) def test(self, username, cookie): raise NotImplementedError def run(self): cookies = self.cookie_db.all() for username, cookie in cookies.items(): self.test(username, cookie)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print( 'IP', proxy,'请求响应码不合法 ') except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): print('测试器运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0,len(proxies),BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) ''' count = self.redis.count() print('当前剩余', count, '个代理') for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) print('正在测试第', start + 1, '-', stop, '个代理') test_proxies = self.redis.batch(start, stop) loop = asyncio.get_event_loop() tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] print(tasks) loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) ''' except Exception as e: print(e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() """ 测试类,会根据提供的测试IP地址来判断代理是否可用 """ async def test_single_proxy(self, proxy): """ 测试单个代理 @param proxy: 单个代理 return None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: real_proxy = 'http://%s' % proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=2) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ClientError, ClientConnectorError, TimeoutError, AttributeError): self.redis.decrease(proxy) def run(self): """ 测试主函数 """ print('Starting test') try: # 获取所有的代理 proxies = self.redis.all() loop = asyncio.get_event_loop() # 批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('Tester errors', e.args)
class Tester(object): # 定义一个类Tester def __init__(self): self.redis = RedisClient() # 建立一个RedisClient对象,供该对象中其他方法使用 async def test_single_proxy(self): # 定义test_single_proxy方法,检测单个代理的可用情况,参数就是被检测的代理,async异步 """ 测试单个代理 :param proxy: 单个代理 :return: None """ conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: # 创建aiohttp的ClientSession对象, try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: # 通过proxy传递get方法 # 测试的链接定义为常量TEST_URL,对某个网站有抓取需求,将TEST_URL设置为目标网站地址 if response.status in VALID_STATUS_CODES: # 定义VALID_STATUS_CODES变量,是列表形式,包含正常状态码 self.redis.max(proxy) # 调用RedisClient的max方法将代理分数设置为100 print('代理可用', proxy) else: self.redis.decrease(proxy) # 否则调用decrease方法将代理分数减1 print('请求相应码不合法', proxy) except (ClientError, ClientConnectorError, TimeoutError, AttributeError): self.redis.decrease(proxy) # 出现异常也同样将代理分数减1 print('代理请求失败', proxy) def run(self): """ 测试主函数 :return: None """ print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() # 批量测试 for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode("utf-8") real_proxy = "http://" + proxy print("正在测试", proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print("代理可用", proxy) else: self.redis.decrease(proxy) print("请求响应码不合法", proxy) except (aiohttp.ClientError, aiohttp.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) print("代理请求失败", proxy) def run(self): print("测试器开始执行") try: proxies = self.redis.all() count = self.redis.count() loop = asyncio.get_event_loop() for i in range(0, count, BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) sys.stdout.flush() time.sleep(5) except Exception as e: print("测试器发生错误", e.args)
class Sync(object): """ mysql与redis数据同步 """ def __init__(self): self.redis = RedisClient() self.pool = ProxyPool.objects.filter(is_exsist=True) # ProxyPool.objects.filter(proxy='').delete() def sync_start(self): # mysql同步到redis for item in self.pool: proxy = item.proxy score = item.score self.redis.add(proxy, score, mysql_save=False) # redis同步到mysql for proxy in self.redis.all(): self.redis.mysql_add(proxy)
class Tester: """检测代理池中代理是否可用,可用则分数至为100,否则分数减一""" def __init__(self): self.redis = RedisClient() async def single_test(self, proxy): """单个代理测试""" conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = "http://" + proxy print("正在测试:", proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status_code in VALID_STATUS_CODES: self.redis.max(proxy) print(proxy, '代理可用') else: self.redis.decrease(proxy) print(proxy, 'IP 请求响应码不合法') except (ClientError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """异步测试""" print('开始测试代理') try: count = self.redis.count() print('当前剩余', count, '个代理') proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, count, BATCH_TEST_SIZE): start = i stop = min(i + BATCH_TEST_SIZE, count) test_proxies = proxies[start:stop] tasks = [self.single_test(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(randint(1, 5)) except Exception as e: print('测试器发生错误', e.args)
class Tester: def __init__(self): """初始化数据库管理对象""" self.redis = RedisClient() async def test_one_proxy(self, proxy): """对目标网站测试一个代理是否可用""" conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): # 解码为字符串 proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=30) as response: if response.status in TRUE_STATUS_CODE: # 代理可用 self.redis.max(proxy) print(proxy, 100, '可用') else: # 代理不可用 self.redis.decrease(proxy) print(proxy, -1, "状态码错误") except Exception as e: self.redis.decrease(proxy) print(proxy, -1, e.args) async def start(self): """启动协程, 测试所有代理""" try: proxies = self.redis.all() for i in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_one_proxy(proxy) for proxy in test_proxies] await asyncio.gather(*tasks) time.sleep(5) except Exception as e: print('测试器发生错误', e.args) def run(self): asyncio.run(self.start())
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (aiohttp.ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError): self.redis.decrease(proxy) def run(self): print('测试器开始运行!') try: proxies = self.redis.all() count = len(proxies) print('当前共有{0}个代理'.format(count)) for i in range(0, count, BATCH_TEST_SIZE): start = i end = min(i + BATCH_TEST_SIZE, count) test_proxies = proxies[start:end] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester_2: def __init__(self): self.redis = RedisClient() self.headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/73.0.3683.103 Safari/537.36' } def single_test(self, proxy): proxies = {'http': 'http://' + proxy, 'https': 'https://' + proxy} try: resp = requests.get(TEST_URL, proxies=proxies, headers=self.headers) if resp.status_code == 200: print(proxy, '代理可用') self.redis.max(proxy) else: self.redis.decrease(proxy) print(proxy, 'IP 请求响应码不合法') except ConnectionError: print('代理请求失败', proxy) self.redis.decrease(proxy) def run(self): count = self.redis.count() print('共有', count, '个代理') print('开始检测代理') proxies = self.redis.all() i = 0 try: for proxy in proxies: print(proxy) i += 1 self.single_test(proxy) time.sleep(randint(1, 5)) if i == 15: break except Exception as e: print('测试器发生错误', e)
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """Test single proxy""" try: conn = TCPConnector(verify_ssl=False) async with ClientSession(connector=conn) as session: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试: ', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=10) \ as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用: ', proxy) else: self.redis.derease(proxy) print('请求响应不合法: ', proxy) except(ClientError, ClientConnectorError, TimeoutError, \ AttributeError): self.redis.decrease(proxy) print('代理请求失败: ', proxy) def run(self): """Test function""" print('开始测试...') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for index in range(0, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[index:index + BATCH_TEST_SIZE] tasks = [ self.test_single_proxy(proxy) for proxy in test_proxies ] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试发生错误', e.args)
class Getter(object): def __init__(self): self.redis = RedisClient() self.crawler = Crawler() def is_over_threshhold(self): if self.redis.count() >= POOL_UPPER_THRESHOLD: return True else: return False def run(self): print('获取器开始执行') if not self.is_over_threshhold(): proxies = self.crawler.run() for ip in proxies: self.redis.add(ip) print('已抓取', ip) print('结束,共抓取', self.redis.count()) for i in self.redis.all(): print(i, '当前分数', self.redis.score(i))
class Tester(object): def __init__(self): self.redis = RedisClient() async def test_single_proxy(self, proxy): """测试单个代理""" conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real_proxy = 'http://' + proxy print('正在测试', proxy) async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODES: self.redis.max(proxy) print('代理可用', proxy) else: self.redis.decrease(proxy) print('请求响应码不合法', proxy) except (ClientProxyConnectionError, ServerDisconnectedError, ClientOSError, ClientHttpProxyError, TimeoutError, AttributeError): self.redis.decrease(proxy) print('代理请求失败', proxy) def run(self): """测试主函数""" print('测试器开始运行') try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(1, len(proxies), BATCH_TEST_SIZE): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.test_single_proxy(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print('测试器发生错误', e.args)
class Tester(): def __init__(self): self.redis = RedisClient() async def single_test(self, proxy): # try connecting with single proxy conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: real_proxy = "https://" + proxy.string() print("testing", proxy) async with session.get(TEST_URL, allow_redirects=False, proxy=real_proxy, timeout=15) as response: if response.status in VALID_STATUS_CODE: self.redis.max(proxy) print("Available proxy:", proxy) else: self.redis.decrease(proxy) print("Not Available Status:", proxy, " Score -1") except (aiohttp.ClientError, aiohttp.ClientConnectorError, TimeoutError, AttributeError, aiohttp.ClientOSError, aiohttp.ClientHttpProxyError): self.redis.decrease(proxy) print("Error detected!", proxy) def run(self): print("Starts running tester") try: entries = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(entries), 200): test_proxies = entries[i:i + BATCH_TEST_SIZE] tasks = [self.single_test(proxy) for proxy in test_proxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print(' Error with tester ', e.args)
def main(): redis_cli = RedisClient() while True: thread_list = [] proxy_list = redis_cli.all() # 获取所有代理 # 多线程检测 if proxy_list: for i in range(5): t = TestProxy(proxy_list) thread_list.append(t) for i in thread_list: i.start() for i in thread_list: i.join() else: print('代理池为空') print(f'检测完毕,暂停{TEST_CYCLE}秒') time.sleep(TEST_CYCLE)
class Tester(object): def __init__(self): self.redis = RedisClient() async def testSingleProxy(self, proxy): conn = aiohttp.TCPConnector(verify_ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: if isinstance(proxy, bytes): proxy = proxy.decode('utf-8') real = 'http://' + proxy print('Testing') async with session.get(TEST_URL, proxy=real, timeout=10) as resp: if resp.status in VAILD_STATUS_CODE: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ClientError, ClientConnectorError, TimeoutError, AttributeError): self.redis.decrease(proxy) def run(self): try: proxies = self.redis.all() loop = asyncio.get_event_loop() for i in range(0, len(proxies), BATCH_TEST_SIZE): testProxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [self.testSingleProxy(proxy) for proxy in testProxies] loop.run_until_complete(asyncio.wait(tasks)) time.sleep(5) except Exception as e: print(e.args)
class Tester: def __init__(self, website='tianyancha'): """初始化数据库管理对象""" self.website = website self.redis = RedisClient('accounts', self.website) async def test_one_proxy(self, key, proxy): """对目标网站测试一个cookies是否可用""" conn = aiohttp.TCPConnector(ssl=False) async with aiohttp.ClientSession(connector=conn) as session: try: # 解码为字符串 headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "max-age=0", "Connection": "keep-alive", "Cookie": proxy[:-1], "Host": "www.tianyancha.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36" } # async with session.get(TEST_URL, headers=headers, timeout=30) as response: try: response = requests.get(TEST_URL, headers=headers, timeout=30) result = response.text html = etree.HTML(result) # print("".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()'))) user = "".join( html.xpath('//span[@class="ni-sp-name"]//text()')) print(user, '*' * 20) """"".join(html.xpath('//div[@class="box -company-box "]/div[@class="content"]/div[@class="header"]/h1[@class="name"]/text()'))""" if response.status_code in TRUE_STATUS_CODE and user: # cookie可用 self.redis.max(key, proxy) print(key, 100, '可用') else: # cookie不可用 # send = Send_Click() # staus = send.run(proxy) # if staus: # self.redis.max(key, proxy) # print(key, 100, "通过点字验证") # else: a = self.redis.decrease(key, proxy) print(key, -20, "状态码错误") except Exception as e: print(key, '请求错误', -20, e) except Exception as e: # self.redis.decrease(key, proxy) print(key, '测试错误', -20, e) async def start(self): """启动协程, 测试所有cookies""" try: keys = self.redis.get() for key in keys: if "tianyancha" not in key: proxies = self.redis.all(key) print(key) for i in range(0, len(proxies)): test_proxies = proxies[i:i + BATCH_TEST_SIZE] tasks = [ self.test_one_proxy(key, proxy) for proxy in test_proxies ] asyncio.gather(*tasks) time.sleep(5) else: pass except Exception as e: print('测试器发生错误', e.args) def run(self): asyncio.run(self.start())
except Exception as e: print('测试器发生错误', e.args) def test_single_tread(self, proxy): real_proxy = {'https': 'https://' + proxy} print('测试', real_proxy) try: res = requests.get(TEST_URL, proxies=real_proxy, timeout=10) if res.status_code in VALID_STATUS_CODES: self.redis.max(proxy) else: self.redis.decrease(proxy) except (ConnectionError, ConnectTimeout): self.redis.decrease(proxy) print('代理请求失败', proxy) def new_run(self): for ip in self.redis.all(): self.test_single_tread(ip) if __name__ == '__main__': db = RedisClient() tester = Tester() # while True: # print('测试器开始') # tester.run() # time.sleep(5) for i in db.all(): tester.test_single_tread(i)