class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: try: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) except Exception, e: print e continue
class ProxyManager(object): def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ 抓取代理地址存入DB中 :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() try: self.log.info("{func}:fetch proxy start".format(func=proxyGetter)) proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] except Exception as e: self.log.error("{func}:fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy)) # 存储到DB for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ 返回一个有用的代理 :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) items = self.db.getAll() if EnvUtil.PY3: return list(items.keys()) if items else list() return items.key() if items else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_proxy = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_proxy }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: # fetch proxy_set = set() try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) proxy_iter = [ _ for _ in getattr(GetFreeProxy, proxyGetter.strip())() ] except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.error('{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) # store for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def validateProxy(self, proxy): return validUsefulProxy(proxy) def referProxy(self, proxy, city): print(proxy, city) if proxy != None: if self.validateProxy(proxy): return proxy old_proxy_info = GetProxyLocInfo(proxy) old_proxy_loc = old_proxy_info.get_proxy_loc_info() city = old_proxy_loc[1] count = 1 while count < 15: tmp_proxy = self.get() print(tmp_proxy) if self.validateProxy(tmp_proxy): try: tmp_proxy_info = GetProxyLocInfo(tmp_proxy) tmp_proxy_loc = tmp_proxy_info.get_proxy_loc_info() if tmp_proxy_loc[1] == city: return tmp_proxy except: pass count += 1 return None def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy['ip'])) self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy['ip']): continue self.db.changeTable(self.raw_proxy_queue) proxy['country'] = self.get_ip_country(proxy['ip']) self.db.put(proxy) def get_ip_country(self, ip): match = geolite2.lookup(ip) return match.country if match else None def get(self, filters): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.random_one(filters) def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() def clean(self): self.db.clean() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }