class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info( "ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.log.info('ProxyFetch - {func}: ' '{proxy} success'.format( func=proxyGetter, proxy=proxy.ljust(20))) self.db.put(Proxy(proxy, source=proxyGetter)) proxy_set.add(proxy) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=proxyGetter)) self.log.error(str(e)) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None def get_http(self): """ return a http proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'http': return Proxy.newProxyFromJson(random_choice) return None def get_socks(self): """ return a useful socks proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'socks4': return Proxy.newProxyFromJson(random_choice) return None def delete(self, proxy_str): """ delete proxy from pool :param proxy_str: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy_str) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list] def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in self.config.proxy_getter_functions: # fetch # proxy_set = set() try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) # proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ 抓取代理地址存入DB中 :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() try: self.log.info("{func}:fetch proxy start".format(func=proxyGetter)) proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] except Exception as e: self.log.error("{func}:fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy)) # 存储到DB for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ 返回一个有用的代理 :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) items = self.db.getAll() if EnvUtil.PY3: return list(items.keys()) if items else list() return items.key() if items else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_proxy = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_proxy }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' self.adsl_queue = 'adsl' def refresh(self): """ fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def initProxyPool(self): """ 第一次启动时调用这个方法 :return: """ self.deleteAll() self.db.changeTable(self.adsl_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.values()) if item_dict else list() return item_dict.values() if item_dict else list() def deleteAll(self): """ 清空代理池 :param proxy: :return: """ # 删除所有 proxies = self.getAll() for proxy in proxies: self.delete(proxy) def refreshADSL(self, proxy): """ 重新拨号 :param proxy: :return: """ if isinstance(proxy, bytes): proxy = proxy.decode('utf8') ip = proxy.split(':')[0] try: # 调用接口重新拨号 refreshApi = "http://{ip}:8000/refresh".format(ip=ip) r = requests.get(refreshApi, timeout=5, verify=False) if r.status_code == 200: print('{proxy} refres done') except Exception as e: print(str(e))
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: # fetch proxy_set = set() try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) proxy_iter = [ _ for _ in getattr(GetFreeProxy, proxyGetter.strip())() ] except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.error('{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) # store for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def validateProxy(self, proxy): return validUsefulProxy(proxy) def referProxy(self, proxy, city): print(proxy, city) if proxy != None: if self.validateProxy(proxy): return proxy old_proxy_info = GetProxyLocInfo(proxy) old_proxy_loc = old_proxy_info.get_proxy_loc_info() city = old_proxy_loc[1] count = 1 while count < 15: tmp_proxy = self.get() print(tmp_proxy) if self.validateProxy(tmp_proxy): try: tmp_proxy_info = GetProxyLocInfo(tmp_proxy) tmp_proxy_loc = tmp_proxy_info.get_proxy_loc_info() if tmp_proxy_loc[1] == city: return tmp_proxy except: pass count += 1 return None def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' @staticmethod def __dynamic_import__(name): components = name.split('.') mod = __import__(components[0]) for comp in components[1:]: mod = getattr(mod, comp) return mod def refresh(self): """ fetch proxy into Db by ProxyGetter user defined proxy getter class :return: """ self.db.changeTable(self.raw_proxy_queue) try: proxy_getter_class = self.__dynamic_import__( config.proxy_getter_lib) except Exception as e: raise Exception('%s not found in ProxyGetter' % config.proxy_getter_lib) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(proxy_getter_class, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = Loghandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy is not False: if proxy and verifyProxyFormat(proxy): self.log.info("{func}: fetch proxy {proxy}".format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( "{func}: fetch proxy {proxy} error".format( func=proxyGetter, proxy=proxy)) except Exception as s: self.log.error("refresh: {}".format(s)) self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None def get_new(self): self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: return random.choice(item_dict) def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() # if EnvUtil.PY3: # return list(item_dict.keys()) if item_dict else list() # return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_proxy = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_proxy } def getNumber_new(self): # self.db.changeTable(self.raw_proxy_queue) raw, useful = self.db.getNumber(self.raw_proxy_queue, self.useful_proxy_queue) # self.db.changeTable(self.useful_proxy_queue) # total_useful_proxy = self.db.getNumber(self.useful_proxy_queue) print('{}---,{}'.format(raw, useful)) return raw, useful
class ProxyManager(object): """ ProxyManager """ def __init__(self, mode): self.mode = mode self.db = DbClient(mode) self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info( 'Mode:{mode} {func}: fetch proxy {proxy}'.format( mode=self.mode, func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( 'Mode:{mode} {func}: fetch proxy {proxy} error'. format(mode=self.mode, func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error("Mode:{mode} {func}: fetch proxy fail".format( mode=self.mode, func=proxyGetter)) continue def get_http(self): """ return a useful proxy (http) :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def get_https(self): """ return a useful proxy (https) :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy['ip'])) self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy['ip']): continue self.db.changeTable(self.raw_proxy_queue) proxy['country'] = self.get_ip_country(proxy['ip']) self.db.put(proxy) def get_ip_country(self, ip): match = geolite2.lookup(ip) return match.country if match else None def get(self, filters): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.random_one(filters) def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() def clean(self): self.db.clean() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info( "ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.log.info('ProxyFetch - {func}: ' '{proxy} success'.format( func=proxyGetter, proxy=proxy.ljust(20))) self.db.put(Proxy(proxy, source=proxyGetter)) proxy_set.add(proxy) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=proxyGetter)) self.log.error(str(e)) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None def delete(self, proxy_str): """ delete proxy from pool :param proxy_str: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy_str) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list] def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def getAllByName(self, name): all_proxies = self.getAll() self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) fail_list = self.db.getAll() fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list] # todo: 优化 filter_proxies = [] for proxy in all_proxies: isFailed = False for failed in fail_proxies: if failed.proxy == proxy.proxy: failed_date = datetime.strptime(failed.last_time, "%Y-%m-%d %H:%M:%S") if failed_date + timedelta(hours=24) > datetime.now(): isFailed = True break if not isFailed: filter_proxies.append(proxy) return filter_proxies def deleteByName(self, name, proxy): failed_proxy = Proxy( proxy=proxy, last_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) self.db.put(failed_proxy) def getByName(self, name): proxies = self.getAllByName(name) if proxies: return random.choice(proxies) return None
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy_test' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy_test' def refresh(self): """从已有站点上抓取proxy,并存放到redis raw_proxy fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ max_conn = 100 meta: dict = {} self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) host, port = proxy.split(":") meta["host"] = host meta["port"] = port meta["max_conn"] = max_conn self.db.put(proxy, json.dumps(meta)) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error(e) self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def add_proxy(proxy, meta): # 向proxy-center中增加proxy节点,同时更新redis host, port = proxy.split(":") url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/' jmeta = json.dumps(meta) r = requests.post(url, data=jmeta) # print(r.status_code) print(r.text) def delete_proxy(proxy): # 从proxy-center中删除proxy节点,同时更新redis host, port = proxy.split(":") url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/' r = requests.delete(url)