class Checker(object): def __init__(self): self.db = RedisClient() self.counts = defaultdict(int) def check(self, proxy): """ 测试代理,返回测试结果 :param proxy: 代理 :return: 测试结果 """ try: response = requests.get(settings.TEST_URL, proxies={ 'http': 'http://' + proxy, 'https': 'https://' + proxy }, timeout=settings.TEST_TIMEOUT) logger.debug(f'Using {proxy} to test {settings.TEST_URL}...') if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def run(self): """ 测试一轮 :return: """ proxies = self.db.all() logger.info(f'Try to get all proxies {proxies}') for name, proxy in proxies.items(): # 检测无效 if not self.check(proxy): logger.info(f'Proxy {proxy} invalid') self.counts[proxy] += 1 else: logger.info(f'Proxy {proxy} valid') count = self.counts.get(proxy) or 0 logger.debug( f'Count {count}, TEST_MAX_ERROR_COUNT {settings.TEST_MAX_ERROR_COUNT}' ) if count >= settings.TEST_MAX_ERROR_COUNT: self.db.remove(name) def loop(self): """ 循环测试 :return: """ while True: logger.info('Check for infinite') self.run() logger.info(f'Tested, sleeping for {settings.TEST_CYCLE}s...') time.sleep(settings.TEST_CYCLE)
class Sender(object): """ 拨号并发送到 Redis """ def __init__(self, server_ip, port, user, pwd, clent_name): self.CLIENT_NAME = clent_name self.ADSLHOST = server_ip self.ADSLPORT = port self.ADSLUSER = user self.ADSLPWD = pwd def test_proxy(self, proxy): """ 测试代理,返回测试结果 :param proxy: 代理 :return: 测试结果 """ try: headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" } response = requests.get( TEST_URL, proxies={ 'http': 'http://' + proxy, 'https': 'https://' + proxy, # 'http': proxy, # 'https': proxy }, timeout=TEST_TIMEOUT, headers=headers, verify=False) if response.status_code == 200: return True else: return False except (ConnectionError, ReadTimeout): return False def parseIfconfig(self, data): data = data.split('\n\n') data = [i for i in data if i and i.startswith('ppp0')] ip = "" for line in data: re_ipaddr = re.compile(r'inet ([\d\.]{7,15})', re.M) ip = re_ipaddr.search(line) if ip: ip = ip.group(1) else: ip = '' return ip @retry(retry_on_result=lambda x: x is not True, stop_max_attempt_number=5) def remove_proxy(self): """ 移除代理 :return: None """ logger.info(f'Removing {self.CLIENT_NAME}...') try: # 由于拨号就会中断连接,所以每次都要重新建立连接 if hasattr(self, 'redis') and self.redis: self.redis.close() self.redis = RedisClient() self.redis.remove(self.CLIENT_NAME) logger.info(f'Removed {self.CLIENT_NAME} successfully') return True except redis.ConnectionError: logger.info(f'Remove {self.CLIENT_NAME} failed') def set_proxy(self, proxy): """ 设置代理 :param proxy: 代理 :return: None """ self.redis = RedisClient() if self.redis.set(self.CLIENT_NAME, proxy): logger.info(f'Successfully set proxy {proxy}') def loop(self): """ 循环拨号 :return: """ while True: logger.info('Starting dial...') self.run() time.sleep(DIAL_CYCLE) @retry(stop_max_attempt_number=3) def run(self): """ 拨号主进程 :return: None """ logger.info('Dial started, remove proxy') try: self.remove_proxy() except RetryError: logger.error('Retried for max times, continue') # 拨号 m = Monitor(self.ADSLHOST, self.ADSLPORT, self.ADSLUSER, self.ADSLPWD) m.link_server("adsl-stop") m.link_server("adsl-stop") m.link_server("adsl-start") content = m.link_server("ifconfig") m.close_net() ip = self.parseIfconfig(content) if ip: proxy = "%s:%s" % (ip, PROXY_PORT) if self.test_proxy(proxy): logger.info(f'Valid proxy {proxy}') # 将代理放入数据库 self.set_proxy(proxy) return True else: logger.error(f'Proxy invalid {proxy}') return False
class maintain_proxy(object): num: int = 0 sleep_time: int = 60 time_out: int = 5 test_url: str = "http://www.baidu.com" max_keepalive_connections: int = 50 max_connections: int = 50 get_proxy_url: str = "proxy——server" name: str = "ppio" redis = RedisClient() def _redis_init(self): try: if hasattr(self, 'redis') and self.redis: self.redis.close() self.redis = RedisClient() except redis.ConnectionError: self.redis = RedisClient() logger.warning("redis ConnectionError") async def get_html(self, name, proxy): # proxy = proxy.replace("http://", "") proxies = { "http://": "http://{proxy}".format(proxy=proxy), "https://": "http://{proxy}".format(proxy=proxy), } # max_keepalive,允许的保持活动连接数或 None 始终允许。(预设10) # max_connections,允许的最大连接数或 None 无限制。(默认为100) limits = httpx.Limits(max_keepalive_connections=self.max_keepalive_connections, max_connections=self.max_connections) try: async with httpx.AsyncClient(limits=limits, proxies=proxies, timeout=self.time_out,verify=False) as client: resp = await client.get(self.test_url) assert resp.status_code == 200 if self.redis.set(proxy, proxy): logger.info(f"{proxy}, 校验成功") else: self.redis.remove(proxy) logger.error(f"{proxy}, 校验失败,不可用代理") except Exception as err: print(resp.status_code) self.redis.remove(proxy) logger.error(f"{proxy}, err : {err} 校验失败,不可用代理") return finally: self.num += 1 @debug async def maintain_proxies_init(self): """ check proxy_pool ip :return: """ while True: try: self._redis_init() name = "adsl1" proxy_list = set(self.redis.proxies()) headers = {} response = requests.get(self.get_proxy_url, headers=headers, timeout=10) res = response.json() proxy_list2 = res["proxy_list"] if proxy_list2: for pattern in proxy_list2: proxy = str(pattern["ip"]) + ":" +str(pattern["port"]) proxy_list.add(proxy) else: logger.info(f"当前api——接口暂时无IP列表") proxy_list = list(proxy_list) if not proxy_list: logger.info(f"api——接口ip 和 {name}池子, ip都为空,即将睡眠,稍后重启") await asyncio.sleep(TEST_CYCLE) continue except Exception as err: logger.error("maintain_proxies_init", err) logger.error(err) await asyncio.sleep(TEST_CYCLE) continue patterns = list(proxy_list) await asyncio.gather( *[self.get_html(name, pattern) for pattern in patterns]) logger.info(f"此轮校验总共{self.num}个ip,完成,剩余{self.redis.count()}暂停{TEST_CYCLE}秒, 进行下一轮的检验") self.num = 0 await asyncio.sleep(TEST_CYCLE)
class Sender(object): """ 拨号并发送到 Redis """ def test_proxy(self, proxy): """ 测试代理,返回测试结果 :param proxy: 代理 :return: 测试结果 """ try: response = requests.get(TEST_URL, proxies={ 'http': 'http://' + proxy, 'https': 'https://' + proxy }, timeout=TEST_TIMEOUT) if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def parseIfconfig(self, data): data = data.split('\n\n') data = [i for i in data if i and i.startswith('ppp0')] ip = "" for line in data: re_ipaddr = re.compile(r'inet ([\d\.]{7,15})', re.M) ip = re_ipaddr.search(line) if ip: ip = ip.group(1) else: ip = '' return ip @retry(retry_on_result=lambda x: x is not True, stop_max_attempt_number=10) def remove_proxy(self): """ 移除代理 :return: None """ logger.info(f'Removing {CLIENT_NAME}...') try: # 由于拨号就会中断连接,所以每次都要重新建立连接 if hasattr(self, 'redis') and self.redis: self.redis.close() self.redis = RedisClient() self.redis.remove(CLIENT_NAME) logger.info(f'Removed {CLIENT_NAME} successfully') return True except redis.ConnectionError: logger.info(f'Remove {CLIENT_NAME} failed') def set_proxy(self, proxy): """ 设置代理 :param proxy: 代理 :return: None """ self.redis = RedisClient() if self.redis.set(CLIENT_NAME, proxy): logger.info(f'Successfully set proxy {proxy}') def loop(self): """ 循环拨号 :return: """ while True: logger.info('Starting dial...') self.run() time.sleep(DIAL_CYCLE) def run(self): """ 拨号主进程 :return: None """ logger.info('Dial started, remove proxy') try: self.remove_proxy() except RetryError: logger.error('Retried for max times, continue') # 拨号 m = Monitor(ADSLHOST, ADSLPORT, ADSLUSER, ADSLPWD) m.link_server("adsl-stop") m.link_server("adsl-start") content = m.link_server("ifconfig") m.close_net() ip = self.parseIfconfig(content) if ip: proxy = "%s:%s" % (ip, PROXY_PORT) if self.test_proxy(proxy): logger.info(f'Valid proxy {proxy}') # 将代理放入数据库 self.set_proxy(proxy) time.sleep(DIAL_CYCLE) else: logger.error(f'Proxy invalid {proxy}') else: # 获取 IP 失败,重新拨号 logger.error('Get IP failed, re-dialing') self.run()
class Sender: def __init__(self): # VPS服务器上在 init 初始RedisClient 实例,下面调用会阻塞住, # 暂时不清楚原因,只能在每个方法里面实例化RedisClient # self.redis = RedisClient() self.timer = time.time() def get_ip(self, ifname=ADSL_IFNAME): """ 获取本机IP :param ifname: 网卡名称 :return: """ (status, output) = subprocess.getstatusoutput('ifconfig') if status == 0: pattern = re.compile(ifname + r'.*?inet.*?(\d+\.\d+\.\d+\.\d+).*?netmask', re.S) result = re.search(pattern, output) if result: ip = result.group(1) return ip def test_proxy(self, proxy): """ 测试代理 :param proxy: 代理 :return: 测试结果 """ proxies = { 'http': f'http://{PROXY_USER}:{PROXY_PASSWORD}@{proxy}', 'https': f'https://{PROXY_USER}:{PROXY_PASSWORD}@{proxy}' } try: response = requests.get(TEST_URL, proxies=proxies, timeout=TEST_TIMEOUT) if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def remove_proxy(self): """ 移除代理 :return: None """ self.redis = RedisClient() self.redis.remove(CLIENT_NAME) print('Successfully Removed Proxy') def set_proxy(self, proxy): """ 设置代理 :param proxy: 代理 :return: None """ self.redis = RedisClient() if self.redis.set(CLIENT_NAME, proxy): print('Successfully Set Proxy', proxy) def remove_proxy(self): """ 移除代理 :return: None """ self.redis = RedisClient() self.redis.remove(CLIENT_NAME) print('Successfully Removed Proxy') def exists_proxy(self): """ 代理是否存在 :return: bool """ self.redis = RedisClient() return self.redis.exists(CLIENT_NAME) def count_time_interval(self): return time.time() - self.timer def adsl(self): """ 拨号主进程 :return: None """ while True: print('ADSL Start, Remove Proxy, Please wait') (status, output) = subprocess.getstatusoutput(ADSL_BASH) if status == 0: print('ADSL Successfully') ip = self.get_ip() if ip: print('Now IP', ip) print('Testing Proxy, Please Wait') proxy = '{ip}:{port}'.format(ip=ip, port=PROXY_PORT) if self.test_proxy(proxy): print('Valid Proxy') self.set_proxy(proxy) print('Sleeping') time.sleep(ADSL_CYCLE) try: self.remove_proxy() except ConnectionError : print('redis链接错误') else: print('Invalid Proxy') else: print('Get IP Failed, Re Dialing') time.sleep(ADSL_ERROR_CYCLE) else: print('ADSL Failed, Please Check') time.sleep(ADSL_ERROR_CYCLE)
class Sender(): # 定义Sender类,作用是执行定时拨号,将新IP测试通过之后更新到远程Redis散列表里 def get_ip(self, ifname=ADSL_IFNAME): # 调用get_ip方法 """ 获取本机IP :param ifname:网卡名称 :return: """ (status, output) = subprocess.getstatusoutput( 'ifconfig') # subprocess模块执行获取IP的命令ifconfig if status == 0: pattern = re.compile( ifname + '.*?inet.*?(\d+\.\d+\.\d+\.\d+).*?netmask', re.S) result = re.search(pattern, output) if result: ip = result.group(1) return ip def test_proxy( self, proxy): # 调用test_proxy方法,将自身的代理设置好,使用requests库来用代理连接TEST_URL """ 测试代理 :param proxy: 代理 :return: 测试结果 """ try: response = requests.get(TEST_URL, proxies={ 'http': 'http://' + proxy, 'https': 'https://' + proxy }, timeout=TEST_TIMEOUT) if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def remove_proxy(self): """ 移除代理 :return:None """ self.redis = RedisClient() self.redis.remove(CLIENT_NAME) print('Successfully Removed Proxy') def set_proxy( self): # 调用set_proxy方法将Redis散列表中本机对应的代理更新,设置时需要指定本机唯一标识和本机当前代理 """ 设置代理 :param proxy: 代理 :return: """ self.redis = RedisClient() if self.redis.set(CLIENT_NAME, proxy): # 本机唯一标识可睡衣设置,对应变量为CLIENT_NAME,保证各台拨号主机不冲突即可 # 调用RedisClient的set方法,参数name为本机唯一标识,proxy为拨号后的新代理,执行后可以更新散列表中的本机代理 print('Successfully Set Proxy', proxy) def adsl(self): # 主方法是adsl方法,首先是一个无限循环,循环体内就是拨号的裸机价 """ 拨号主进程 :return:None """ while True: print('ADSL Start, Remove Proxy, Please wait') self.remove_proxy( ) # 调用remove_proxy方法,将远程Redis散列表中本机对应代理移除,避免拨号本主机的残留代理被取到 (status, output) = subprocess.getstatusoutput( ADSL_BASH) # 利用subprocess模块来执行拨号脚本,拨号命令ADSL_BASH if status == 0: print('ADSL Successfully') ip = self.get_ip() if ip: print('Now IP', ip) print('Testing Proxy, Please Wait') proxy = '{ip}:{port}'.format(ip=ip, port=PROXY_PROT) if self.test_proxy(proxy): print('Valid Proxy') self.set_proxy(proxy) print('Sleeping') time.sleep(ADSL_CYCLE) else: print('Invalid Proxy') else: print('Get IP Failed, Re Dialing') time.sleep(ADSL_ERROR_CYCLE) else: print('ADSL Failed, Please Check') time.sleep(ADSL_ERROR_CYCLE) def run(): sender = Sender() sender.adsl()
class Checker(object): def __init__(self): self.db = RedisClient() self.counts = defaultdict(int) def check(self, proxy): """ 测试代理,返回测试结果 :param proxy: 代理 :return: 测试结果 """ try: headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" } response = requests.get(settings.TEST_URL, proxies={ 'http': 'http://' + proxy, 'https': 'https://' + proxy }, timeout=settings.TEST_TIMEOUT, headers=headers, verify=False) logger.debug(f'Using {proxy} to test {settings.TEST_URL}...') if response.status_code == 200: return True except (ConnectionError, ReadTimeout): return False def run(self): """ 测试一轮 :return: """ proxies = self.db.all() logger.info(f'Try to get all proxies {proxies}') for name, proxy in proxies.items(): # 检测无效 if not self.check(proxy): logger.info(f'Proxy {proxy} invalid') self.counts[proxy] += 1 else: logger.info(f'Proxy {proxy} valid') count = self.counts.get(proxy) or 0 logger.debug( f'Count {count}, TEST_MAX_ERROR_COUNT {settings.TEST_MAX_ERROR_COUNT}' ) if count >= settings.TEST_MAX_ERROR_COUNT: self.db.remove(name) def loop(self): """ 循环测试 :return: """ while True: logger.info('Check for infinite') self.run() logger.info(f'Tested, sleeping for {settings.TEST_CYCLE}s...') time.sleep(settings.TEST_CYCLE)