def run(self): self.log.info("RawProxyCheck - {} : start".format(self.name)) self.db.changeTable(self.useful_proxy_queue) while True: try: proxy_json = self.queue.get(block=False) except Empty: self.log.info("RawProxyCheck - {} : exit".format(self.name)) break proxy_obj = Proxy.newProxyFromJson(proxy_json) proxy_obj, status = checkProxyUseful(proxy_obj, self.origin_ips) if status: if self.db.exists(proxy_obj.proxy): self.log.info( 'RawProxyCheck - {} : {} validation exists'.format( self.name, proxy_obj.proxy.ljust(20))) else: self.db.put(proxy_obj) self.log.info( 'RawProxyCheck - {} : {} validation pass'.format( self.name, proxy_obj.proxy.ljust(20))) else: self.log.info( 'RawProxyCheck - {} : {} validation fail'.format( self.name, proxy_obj.proxy.ljust(20))) self.queue.task_done()
def run(self): self.log.info("UsefulProxyCheck - {} : start".format(self.name)) self.db.changeTable(self.useful_proxy_queue) while True: try: proxy_str = self.queue.get(block=False) except Empty: self.log.info("UsefulProxyCheck - {} : exit".format( self.name)) break proxy_obj = Proxy.newProxyFromJson(proxy_str) proxy_obj, status = checkProxyUseful(proxy_obj) if status or proxy_obj.fail_count < FAIL_COUNT: if self.db.exists(proxy_obj.proxy): self.log.info( 'UsefulProxyCheck - {} : {} validation exists'.format( self.name, proxy_obj.proxy.ljust(20))) self.db.put(proxy_obj) self.log.info( 'UsefulProxyCheck - {} : {} validation pass'.format( self.name, proxy_obj.proxy.ljust(20))) else: self.log.info( 'UsefulProxyCheck - {} : {} validation fail'.format( self.name, proxy_obj.proxy.ljust(20))) self.db.delete(proxy_obj.proxy) self.queue.task_done()
def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list]
def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None
def testProxyClass(): proxy = Proxy("127.0.0.1:8080") print(proxy.info_dict) proxy.source = "test" proxy_str = json.dumps(proxy.info_dict, ensure_ascii=False) print(proxy_str) print(Proxy.newProxyFromJson(proxy_str).info_dict)
def get_http(self): """ return a http proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: for _ in item_list: random_choice = random.choice(item_list) proxy_type = json.loads(random_choice)['proxy'].split("://")[0] if proxy_type == 'http': return Proxy.newProxyFromJson(random_choice) return None
def getAllByName(self, name): all_proxies = self.getAll() self.db.changeTable(self.useful_proxy_queue + '_fail_' + name) fail_list = self.db.getAll() fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list] # todo: 优化 filter_proxies = [] for proxy in all_proxies: isFailed = False for failed in fail_proxies: if failed.proxy == proxy.proxy: failed_date = datetime.strptime(failed.last_time, "%Y-%m-%d %H:%M:%S") if failed_date + timedelta(hours=24) > datetime.now(): isFailed = True break if not isFailed: filter_proxies.append(proxy) return filter_proxies
def get_all_proxy(self): self.db.changeTable(self.asdl_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list]