def download(self, url): try: print("Downloading %s" % url) r = requests.get(url=url, headers=self.get_header(), timeout=TIMEOUT) r.encoding = chardet.detect(r.content)["encoding"] if (not r.ok) or len(r.content) < 300: raise ConnectionError else: return r.text except Exception: count = 0 # 重试次数 proxy_list = SQLManager().select(10) if not proxy_list: return None while count < NUM_RETRIES: try: proxy = random.choice(proxy_list) ip = proxy[0] port = proxy[1] proxies = { "http": "http://%s:%s" % (ip, port), "https": "http://%s:%s" % (ip, port) } r = requests.get(url=url, headers=self.get_header(), timeout=TIMEOUT, proxies=proxies) r.encoding = chardet.detect(r.content)["encoding"] if (not r.ok) or len(r.content) < 500: raise ConnectionError else: return r.text except Exception: count += 1 return None
def refresh(): SQLManager().drop() SQLManager().create() return "Refresh Success!"
def delete(ip): if SQLManager().delete(ip): return "Delete Success!" else: return "Delete None!"
def get_more(): return str(SQLManager().select(100))
def get(): conditions = {} for key, values in request.args.items(): conditions[key] = values return str(SQLManager().select(count=20, conditions=conditions))
def index(): return str(SQLManager().select(50))
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # Date: 18-12-18 from util.validator import start_check from util.util_sql import SQLManager from spider.spider import start_spider from web.web_server import web_run from config import MAX_CHECK, MAX_DOWNLOAD from multiprocessing import Queue, Process SQLManager().create() q0 = Queue() q1 = Queue(maxsize=MAX_DOWNLOAD) q2 = Queue(maxsize=MAX_CHECK) p1 = Process(target=web_run) p2 = Process(target=start_spider, args=(q0, )) p3 = Process(target=start_check, args=(q0, )) p1.start() p2.start() p3.start() p1.join() p2.join() p3.join()
def check(self, proxy): if self.live(proxy): proxy["score"] = 10 SQLManager().insert(proxy)