class ProxyFactory: def __init__(self): self.db = RedisClient(config.NAME, config.HOST, config.PORT, config.PASSWORD) def get_proxy(self): res = self.db.get() proxies = {"http": "http://{proxy}".format(proxy=res)} return proxies def del_proxy(self, proxies): key = proxies['http'].split("//")[1] print(key) return self.db.delete(key)
class ProxyValidator: def __init__(self): self.proxy_queue = Queue() self.logger = logger self.html_request = HtmlRequest() self.db = RedisClient(config.NAME, config.HOST, config.PORT, config.PASSWORD) def start_valid(self, thread_num=10): thread_list = [] for i in range(thread_num): thread_list.append(Thread(target=self.vaild, name="check_proxy_thread-%d" % i)) for thread in thread_list: thread.daemon = True thread.start() for thread in thread_list: thread.join() def vaild(self): while not self.proxy_queue.empty(): proxy = self.proxy_queue.get() if not self.check(proxy): self.logger.info("invalid proxy %s", proxy) self.db.delete(proxy) self.proxy_queue.task_done() def run(self): self.init_queue() while True: if not self.proxy_queue.empty(): self.logger.info("start valid proxy...") self.start_valid() else: self.logger.info("valid complete! wait next valid") time.sleep(60 * 10) self.init_queue() def init_queue(self): for item in self.db.get_all(): self.proxy_queue.put(item) def check(self, proxy): proxies = {"http": "http://{proxy}".format(proxy=proxy)} try: # 超过20秒的代理就不要了 headers = { 'Host': 'kyfw.12306.cn', 'Referer': 'https://kyfw.12306.cn/otn/leftTicket/init', } r = self.html_request.get(config.CHECK_TARGET, header=headers, proxies=proxies) # r = requests.get(url=config.CHECK_TARGET, headers=headers, proxies=proxies, timeout=10, verify=False) if r.status_code == 200: logger.info('%s is ok' % proxy) return True except Exception as e: logger.error(str(e)) return False