class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.db = DbClient() self.log = LogHandler('valid_schedule') def __validProxy(self): """ 验证代理 :return: """ time.sleep(60 * 0 * random.random()) while True: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if isinstance(each_proxy, bytes): each_proxy = each_proxy.decode('utf-8') if validUsefulProxy(each_proxy) == True: # 成功计数器加1 self.db.inckey(each_proxy, 1) self.log.debug('validProxy_b: {} validation pass'.format(each_proxy)) else: # 失败计数器减一 print "原有value " + str(self.db.getvalue(each_proxy)) if self.db.getvalue(each_proxy) >= 0: self.db.inckey(each_proxy, -1*int(self.db.getvalue(each_proxy))) else: self.db.inckey(each_proxy, -1) # self.db.delete(each_proxy) self.log.info('validProxy_b: {} validation fail'.format(each_proxy)) value = self.db.getvalue(each_proxy) print value if None != value and int(value) < 0: # 计数器小于-5删除该代理 print "删除" + each_proxy self.db.delete(each_proxy) self.log.info('validProxy_a running normal') def main(self): self.__validProxy()
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.useful_proxy_queue = 'useful_proxy_queue' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy if proxy_set: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy_set.add(proxy) # store raw proxy self.db.changeTable(self.raw_proxy_queue) for proxy in proxy_set: self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.getAll()
class PayProxyTest(object): def __init__(self): self.db = DbClient() self.db.changeTable('useful_proxy') def GetProxy(self): try: time1 = time.time() url = 'https://api.2808proxy.com/proxy/unify/get?token=Y3AEO9WES4U3WKQAJXZO8DYM7LAZFOQN&amount=1&proxy_type=http&format=json&splitter=rn&expire=300' resp = requests.get(url) ip = resp.json().get('data')[0].get('ip') http_port = resp.json().get('data')[0].get('http_port') proxy = '%s:%s' % (ip, http_port) print(proxy) time2 = time.time() print(resp.json()) print('总耗时:', time2 - time1) except Exception as e: print(e) def InsertProxy(self, proxy): self.db.put(proxy)
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: try: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) except Exception, e: print e continue
class AsdlProxyManager(): def __init__(self): self.asdl_proxy_queue = "asdl_proxy" self.db = DbClient() def add_asdl_proxy(self, proxy_str): proxy = Proxy(proxy_str, last_time=datetime.now().strftime('%Y-%m-%d %H:%M:%S')) self.db.changeTable(self.asdl_proxy_queue) self.db.put(proxy) def delete_asdl_proxy(self, proxy_str): self.db.changeTable(self.asdl_proxy_queue) self.db.delete(proxy_str) def get_all_proxy(self): self.db.changeTable(self.asdl_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list]
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' self.adsl_queue = 'adsl' def refresh(self): """ fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def initProxyPool(self): """ 第一次启动时调用这个方法 :return: """ self.deleteAll() self.db.changeTable(self.adsl_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.values()) if item_dict else list() return item_dict.values() if item_dict else list() def deleteAll(self): """ 清空代理池 :param proxy: :return: """ # 删除所有 proxies = self.getAll() for proxy in proxies: self.delete(proxy) def refreshADSL(self, proxy): """ 重新拨号 :param proxy: :return: """ if isinstance(proxy, bytes): proxy = proxy.decode('utf8') ip = proxy.split(':')[0] try: # 调用接口重新拨号 refreshApi = "http://{ip}:8000/refresh".format(ip=ip) r = requests.get(refreshApi, timeout=5, verify=False) if r.status_code == 200: print('{proxy} refres done') except Exception as e: print(str(e))
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy self.db.changeTable(self.raw_proxy_queue) for proxy in proxy_set: self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.get() # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() def get_status(self): # TODO rename get_count.. self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.get_status() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.get_status() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy_test' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy_test' def refresh(self): """从已有站点上抓取proxy,并存放到redis raw_proxy fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ max_conn = 100 meta: dict = {} self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) host, port = proxy.split(":") meta["host"] = host meta["port"] = port meta["max_conn"] = max_conn self.db.put(proxy, json.dumps(meta)) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error(e) self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def add_proxy(proxy, meta): # 向proxy-center中增加proxy节点,同时更新redis host, port = proxy.split(":") url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/' jmeta = json.dumps(meta) r = requests.post(url, data=jmeta) # print(r.status_code) print(r.text) def delete_proxy(proxy): # 从proxy-center中删除proxy节点,同时更新redis host, port = proxy.split(":") url = f'http://10.143.55.90:9381/api/proxies/{host}%3A{port}/' r = requests.delete(url)
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info( "ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.log.info('ProxyFetch - {func}: ' '{proxy} success'.format( func=proxyGetter, proxy=proxy.ljust(20))) self.db.put(Proxy(proxy, source=proxyGetter)) proxy_set.add(proxy) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=proxyGetter)) self.log.error(str(e)) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None def get_http(self): """ return a http proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'http': return Proxy.newProxyFromJson(random_choice) return None def get_socks(self): """ return a useful socks proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'socks4': return Proxy.newProxyFromJson(random_choice) return None def delete(self, proxy_str): """ delete proxy from pool :param proxy_str: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy_str) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list] def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ 抓取代理地址存入DB中 :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() try: self.log.info("{func}:fetch proxy start".format(func=proxyGetter)) proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] except Exception as e: self.log.error("{func}:fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info("{func}:fetch proxy {proxy}".format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.info("{func}:fetch proxy {proxy} error".format(func=proxyGetter, proxy=proxy)) # 存储到DB for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ 返回一个有用的代理 :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) items = self.db.getAll() if EnvUtil.PY3: return list(items.keys()) if items else list() return items.key() if items else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_proxy = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_proxy }
class ProxyManager(object): def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = Loghandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy is not False: if proxy and verifyProxyFormat(proxy): self.log.info("{func}: fetch proxy {proxy}".format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( "{func}: fetch proxy {proxy} error".format( func=proxyGetter, proxy=proxy)) except Exception as s: self.log.error("refresh: {}".format(s)) self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None def get_new(self): self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: return random.choice(item_dict) def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() # if EnvUtil.PY3: # return list(item_dict.keys()) if item_dict else list() # return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_proxy = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_proxy } def getNumber_new(self): # self.db.changeTable(self.raw_proxy_queue) raw, useful = self.db.getNumber(self.raw_proxy_queue, self.useful_proxy_queue) # self.db.changeTable(self.useful_proxy_queue) # total_useful_proxy = self.db.getNumber(self.useful_proxy_queue) print('{}---,{}'.format(raw, useful)) return raw, useful
class ProxyManager(object): """ ProxyManager """ def __init__(self, mode): self.mode = mode self.db = DbClient(mode) self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter/getFreeProxy.py :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info( 'Mode:{mode} {func}: fetch proxy {proxy}'.format( mode=self.mode, func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( 'Mode:{mode} {func}: fetch proxy {proxy} error'. format(mode=self.mode, func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error("Mode:{mode} {func}: fetch proxy fail".format( mode=self.mode, func=proxyGetter)) continue def get_http(self): """ return a useful proxy (http) :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def get_https(self): """ return a useful proxy (https) :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy['ip'])) self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy['ip']): continue self.db.changeTable(self.raw_proxy_queue) proxy['country'] = self.get_ip_country(proxy['ip']) self.db.put(proxy) def get_ip_country(self, ip): match = geolite2.lookup(ip) return match.country if match else None def get(self, filters): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.random_one(filters) def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() def clean(self): self.db.clean() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info( "ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format( func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.log.info('ProxyFetch - {func}: ' '{proxy} success'.format( func=proxyGetter, proxy=proxy.ljust(20))) self.db.put(Proxy(proxy, source=proxyGetter)) proxy_set.add(proxy) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=proxyGetter)) self.log.error(str(e)) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None def delete(self, proxy_str): """ delete proxy from pool :param proxy_str: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy_str) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list] def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } def getAllByName(self, name): all_proxies = self.getAll() self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) fail_list = self.db.getAll() fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list] # todo: 优化 filter_proxies = [] for proxy in all_proxies: isFailed = False for failed in fail_proxies: if failed.proxy == proxy.proxy: failed_date = datetime.strptime(failed.last_time, "%Y-%m-%d %H:%M:%S") if failed_date + timedelta(hours=24) > datetime.now(): isFailed = True break if not isFailed: filter_proxies.append(proxy) return filter_proxies def deleteByName(self, name, proxy): failed_proxy = Proxy( proxy=proxy, last_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) self.db.put(failed_proxy) def getByName(self, name): proxies = self.getAllByName(name) if proxies: return random.choice(proxies) return None
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy: self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) # store raw proxy for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.useful_proxy_queue = 'useful_proxy_queue' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() # fetch raw proxy for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): if proxy.strip(): proxy_set.add(proxy.strip()) # store raw proxy self.db.changeTable(self.raw_proxy_queue) for proxy in proxy_set: self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.get() # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool :return: """ self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() def get_status(self): self.db.changeTable(self.raw_proxy_queue) quan_raw_proxy = self.db.get_status() self.db.changeTable(self.useful_proxy_queue) quan_useful_queue = self.db.get_status() return {'raw_proxy': quan_raw_proxy, 'useful_proxy_queue': quan_useful_queue}
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ for proxyGetter in self.config.proxy_getter_functions: # fetch proxy_set = set() try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) proxy_iter = [ _ for _ in getattr(GetFreeProxy, proxyGetter.strip())() ] except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue for proxy in proxy_iter: proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy) else: self.log.error('{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) # store for proxy in proxy_set: self.db.changeTable(self.useful_proxy_queue) if self.db.exists(proxy): continue self.db.changeTable(self.raw_proxy_queue) self.db.put(proxy) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def validateProxy(self, proxy): return validUsefulProxy(proxy) def referProxy(self, proxy, city): print(proxy, city) if proxy != None: if self.validateProxy(proxy): return proxy old_proxy_info = GetProxyLocInfo(proxy) old_proxy_loc = old_proxy_info.get_proxy_loc_info() city = old_proxy_loc[1] count = 1 while count < 15: tmp_proxy = self.get() print(tmp_proxy) if self.validateProxy(tmp_proxy): try: tmp_proxy_info = GetProxyLocInfo(tmp_proxy) tmp_proxy_loc = tmp_proxy_info.get_proxy_loc_info() if tmp_proxy_loc[1] == city: return tmp_proxy except: pass count += 1 return None def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' @staticmethod def __dynamic_import__(name): components = name.split('.') mod = __import__(components[0]) for comp in components[1:]: mod = getattr(mod, comp) return mod def refresh(self): """ fetch proxy into Db by ProxyGetter user defined proxy getter class :return: """ self.db.changeTable(self.raw_proxy_queue) try: proxy_getter_class = self.__dynamic_import__( config.proxy_getter_lib) except Exception as e: raise Exception('%s not found in ProxyGetter' % config.proxy_getter_lib) for proxyGetter in config.proxy_getter_functions: # fetch try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) for proxy in getattr(proxy_getter_class, proxyGetter.strip())(): # 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能 proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): """ fetch proxy into Db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) for proxyGetter in self.config.proxy_getter_functions: # fetch # proxy_set = set() try: self.log.info( "{func}: fetch proxy start".format(func=proxyGetter)) # proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()] for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if proxy and verifyProxyFormat(proxy): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) self.db.put(proxy) else: self.log.error( '{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) except Exception as e: self.log.error( "{func}: fetch proxy fail".format(func=proxyGetter)) continue def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: return random.choice(list(item_dict.keys())) else: return random.choice(item_dict.keys()) return None # return self.db.pop() def delete(self, proxy): """ delete proxy from pool :param proxy: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyManager(object): raw_proxy_name = "raw_proxy" useful_proxy_name = "useful_proxy" def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.useful_proxy_queue = 'useful_proxy' def refresh(self): proxy_getter_functions = config.cf.options("ProxyGetter") for proxyGetter in proxy_getter_functions: try: log.info( "Fetch Proxy Start, func:{func}".format(func=proxyGetter)) total = 0 succ = 0 fail = 0 for proxy in getattr(GetFreeProxy, proxyGetter.strip())(): proxy = proxy.strip() if proxy and verifyProxyFormat( proxy) and not self.checkRawProxyExists(proxy): self.saveRawProxy(proxy) succ = succ + 1 log.debug('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) else: fail = fail + 1 log.error('{func}: fetch proxy {proxy} error'.format( func=proxyGetter, proxy=proxy)) total = total + 1 log.info( "fetch proxy end, func:{func}, total:{total}, succ:{succ} fail:{fail}" .format(func=proxyGetter, total=total, succ=succ, fail=fail)) except Exception as e: log.error( "func_name:{func_name} fetch proxy fail, error:{error}". format(func_name=proxyGetter, error=e)) continue def get(self): item = None item_list = [] self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if item_dict: if EnvUtil.PY3: item_list = list(item_dict.keys()) else: item_list = item_dict.keys() if item_list: item = random.choice(item_list) log.debug('Get Random Proxy {item} of {total}'.format( item=item, total=len(item_list))) return item def getAll(self): self.db.changeTable(self.useful_proxy_queue) item_dict = self.db.getAll() if EnvUtil.PY3: return list(item_dict.keys()) if item_dict else list() return item_dict.keys() if item_dict else list() def getAllUsefulProxy(self): result = self.db.getAllUsefulProxy() return result def getAllRawProxy(self): result = self.db.getAllRawProxy() return result def checkRawProxyExists(self, proxy): result = self.db.checkRawProxyExists(proxy) return result def getSampleRawProxy(self): result = self.db.getSampleRawProxy() return result def getQualityProxy(self, **kwargs): item = self.db.getQualityProxy(**kwargs) result = None if item: result = item["proxy"] token = kwargs.get("token", None) if token: self.db.addProxyUsedToken(result, token) log.debug("getQualityProxy, item:{item}".format(item=str(item))) return result def getSampleProxy(self, **kwargs): item = self.db.getSampleUsefulProxy(**kwargs) result = None if item: result = item["proxy"] log.debug("getSampleUsefulProxy, item:{item}".format(item=str(item))) return result # 准备删除 def getSampleUsefulProxy(self, **kwargs): item = self.db.getSampleUsefulProxy(**kwargs) result = None if item: result = item["proxy"] token = kwargs.get("token", None) if token: self.db.addProxyUsedToken(result, token) log.debug("getSampleUsefulProxy, item:{item}".format(item=str(item))) return result def deleteRawProxy(self, proxy): self.db.deleteRawProxy(proxy) def saveRawProxy(self, proxy): self.db.saveRawProxy(proxy) # TODO: 逻辑应该有问题, 但不确定 # http是可用的才会保存https, 会不会有只开通https的代理呢? def saveUsefulProxy(self, proxy, https=False): data = { "proxy": proxy, "succ": 0, "fail": 0, "total": 0, "https": https } self.db.saveUsefulProxy(proxy, data) def deleteUsefulProxy(self, proxy): self.db.deleteUsefulProxy(proxy) def tickUsefulProxyVaildSucc(self, proxy): self.db.tickUsefulProxyVaildSucc(proxy) def tickUsefulProxyVaildFail(self, proxy): self.db.tickUsefulProxyVaildFail(proxy) def tickUsefulProxyVaildTotal(self, proxy): self.db.tickUsefulProxyVaildTotal(proxy) def tickRawProxyVaildSucc(self, proxy): self.db.tickRawProxyVaildSucc(proxy) def tickRawProxyVaildFail(self, proxy): self.db.tickRawProxyVaildFail(proxy) def tickRawProxyVaildTotal(self, proxy): self.db.tickRawProxyVaildTotal(proxy) def getProxyNumber(self): total_raw_proxy = self.getRawProxyNumber() total_useful_queue = self.getUsefulProxyNumber() result = { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue } return result def getRawProxyNumber(self): num = self.db.getProxyNum(self.raw_proxy_name) return num def getUsefulProxyNumber(self): num = self.db.getProxyNum(self.useful_proxy_name) return num