def valid_raw_Proxy(self): """ 验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue :return: """ self.db.changeTable(self.raw_proxy_queue) raw_proxy_item = self.db.pop() #dict {"proxy:" proxy, "value:" value} self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime()) # 计算剩余代理,用来减少重复计算 remaining_proxies = self.getAll() while raw_proxy_item: raw_proxy = raw_proxy_item.get('proxy') meta = raw_proxy_item.get('value') if isinstance(raw_proxy, bytes): # 兼容Py3 raw_proxy = raw_proxy.decode('utf8') if (raw_proxy not in remaining_proxies) and validUsefulProxy(raw_proxy): self.db.changeTable(self.useful_proxy_queue) # self.db.put(raw_proxy, meta) ProxyManager.add_proxy(raw_proxy, eval(meta)) self.log.info('ProxyRefreshSchedule: %s validation pass' % raw_proxy) else: self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy) self.db.changeTable(self.raw_proxy_queue) raw_proxy_item = self.db.pop() remaining_proxies = self.getAll() self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
def __init__(self, queue, item_dict, check_urls): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check', file=False) # 多线程同时写一个日志文件会有问题 self.queue = queue self.item_dict = item_dict self.check_urls = check_urls
def __init__(self, mode): ProxyManager.__init__(self, mode) self.refresh_log = LogHandler('refresh_schedule') self.log = LogHandler('proxy_check', file=False) self.queue = Queue() self.proxy_item = None self.item_dict = None self.timeout = 15
def __init__(self): ProxyManager.__init__(self) self.remaining_proxies = self.getAll() self.queue = Queue() items = self.getAllRawProxy() for proxy in items.keys(): self.queue.put(proxy)
def get(): proxy = ProxyManager().get() status = ProxyManager().get_status() num = int(status.pop('useful_proxy')) if num < mini_proxy_num: #print 'NULL' return u'NULL' else: #print proxy return proxy
def delete(): if not request.args.get('token') == host_token: return 'token required' proxy = request.args.get('proxy') ProxyManager().delete(proxy) return 'success'
def nameDelete(): name = request.args.get('name') proxy = request.args.get('proxy') if not name or not proxy: return 'error' ProxyManager().deleteByName(name, proxy) return {"code": 0, "src": "success"}
def run(self): self.db.changeTable(self.useful_proxy_queue) while self.queue.qsize(): try: proxy = self.queue.get() except Empty: break if validUsefulProxy(proxy): self.log.info(f'ProxyCheck: {proxy} validation pass') else: self.log.info( f'ProxyCheck: {proxy} validation fail, delete it from useful_proxy!' ) # self.db.delete(proxy) ProxyManager.delete_proxy(proxy) self.queue.task_done()
def get(): country = request.args.get('foreign') anony = request.args.get('anony') filters = {} if foreign: filters['foreign'] = foreign if anony: filters['anony'] = anony proxy = ProxyManager().get(filters) return jsonify(proxy) if proxy else 'no proxy!'
def __init__(self): ProxyManager.__init__(self)
def getAll(): proxies = ProxyManager().getAll() updata_proxies = list(proxies)[-20:] #获取后面更新的20条新代理 return jsonify(updata_proxies)
def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule')
def getAll(): proxies = ProxyManager().getAll() return proxies
def getStatus(): status = ProxyManager().getNumber() return status
def getStatus(): status = ProxyManager().getNumber() return jsonify(status)
def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule')
def getStatus_http(): status = ProxyManager('http').getNumber() return status
def getAll(): proxies = ProxyManager().getAll() return jsonify(proxies)
def get_https(): proxy = ProxyManager('https').get_https() return proxy if proxy else 'no proxy'
def get(): proxy = ProxyManager().get() return proxy if proxy else 'no proxy!'
def __init__(self): ProxyManager.__init__(self) self.db = DbClient() self.log = LogHandler('valid_schedule')
def __init__(self, queue, item_dict): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check', file=False) # 多线程同时写一个日志文件会有问题 self.queue = queue self.item_dict = item_dict
def get(): proxy = ProxyManager().get() return proxy
def __init__(self): ProxyManager.__init__(self) self.queue = Queue() self.proxy_item = dict()
def refresh(): # TODO refresh会有守护程序定时执行,由api直接调用性能较差,暂不使用 ProxyManager().refresh() return 'success'
def refresh(): ProxyManager().refresh() return 'success'
def getAll_https(): proxies = ProxyManager('https').getAll() return proxies
def __init__(self): ProxyManager.__init__(self) Thread.__init__(self) self.log = LogHandler('proxy_check')
def get_proxy(): proxy = ProxyManager().get() return proxy if proxy else None
def delete_proxy(proxy): ProxyManager().delete(proxy)
def delete(): proxy = request.args.get('proxy') ProxyManager().delete(proxy) return 'success'
def get_useful_proxy(): useful_proxy = ProxyManager().getUsefulNumber() return useful_proxy