def __init__(self, check_type, queue, thread_name): Thread.__init__(self, name=thread_name) self.type = check_type self.log = LogHandler("checker") self.proxy_handler = ProxyHandler() self.queue = queue self.conf = ConfigHandler()
def __init__(self, work_type, target_queue, thread_name): Thread.__init__(self, name=thread_name) self.work_type = work_type self.log = LogHandler("checker") self.proxy_handler = ProxyHandler() self.target_queue = target_queue self.conf = ConfigHandler()
def __init__(self, fetch_source, proxy_dict): Thread.__init__(self) self.fetch_source = fetch_source self.proxy_dict = proxy_dict self.fetcher = getattr(ProxyFetcher, fetch_source, None) self.log = LogHandler("fetcher") self.conf = ConfigHandler() self.proxy_handler = ProxyHandler()
class _ThreadFetcher(Thread): def __init__(self, fetch_source, proxy_dict): Thread.__init__(self) self.fetch_source = fetch_source self.proxy_dict = proxy_dict self.fetcher = getattr(ProxyFetcher, fetch_source, None) self.log = LogHandler("fetcher") self.conf = ConfigHandler() self.proxy_handler = ProxyHandler() def run(self): self.log.info("ProxyFetch - {func}: start".format(func=self.fetch_source)) try: for proxy in self.fetcher(): self.log.info('ProxyFetch - %s: %s ok' % (self.fetch_source, proxy.ljust(23))) proxy = proxy.strip() if proxy in self.proxy_dict: self.proxy_dict[proxy].add_source(self.fetch_source) else: self.proxy_dict[proxy] = Proxy( proxy, source=self.fetch_source) except Exception as e: self.log.error("ProxyFetch - {func}: error".format(func=self.fetch_source)) self.log.error(str(e))
def runScheduler(): _runProxyFetch() timezone = ConfigHandler().timezone scheduler_log = LogHandler("scheduler") scheduler = BlockingScheduler(logger=scheduler_log, timezone=timezone) scheduler.add_job(_runProxyFetch, 'interval', minutes=4, id="proxy_fetch", name="proxy采集") scheduler.add_job(_runProxyCheck, 'interval', minutes=2, id="proxy_check", name="proxy检查") executors = { 'default': { 'type': 'threadpool', 'max_workers': 20 }, 'processpool': ProcessPoolExecutor(max_workers=5) } job_defaults = {'coalesce': False, 'max_instances': 10} scheduler.configure(executors=executors, job_defaults=job_defaults, timezone=timezone) scheduler.start()
def runDownScheduler(): """ down shecdule random(1-5)hour :return: """ timezone = ConfigHandler().timezone scheduler_log = LogHandler("schedule") scheduler = BackgroundScheduler(logger=scheduler_log, timezone=timezone) intlTime = random.randint(1, 5) scheduler.add_job(execDown(random.randint(5, 30)), 'interval', hour=intlTime, id="down_url", name="url下载") executors = { 'default': { 'type': 'threadpool', 'max_workers': 20 }, 'processpool': ProcessPoolExecutor(max_workers=5) } job_defaults = {'coalesce': False, 'max_instances': 10} scheduler.configure(executors=executors, job_defaults=job_defaults, timezone=timezone) scheduler.start()
def test(self): log = LogHandler('ssdb_client') try: self.getCount() except TimeoutError as e: log.error('ssdb connection time out: %s' % str(e), exc_info=True) return e except ConnectionError as e: log.error('ssdb connection error: %s' % str(e), exc_info=True) return e except ResponseError as e: log.error('ssdb connection error: %s' % str(e), exc_info=True) return e
class Fetcher(object): name = "fetcher" def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() def run(self): """ fetch proxy with proxyFetcher :return: """ proxy_dict = dict() thread_list = list() self.log.info("ProxyFetch : start") for fetch_source in self.conf.fetchers: self.log.info("ProxyFetch - {func}: start".format(func=fetch_source)) fetcher = getattr(ProxyFetcher, fetch_source, None) if not fetcher: self.log.error("ProxyFetch - {func}: class method not exists!".format(func=fetch_source)) continue if not callable(fetcher): self.log.error("ProxyFetch - {func}: must be class method".format(func=fetch_source)) continue thread_list.append(_ThreadFetcher(fetch_source, proxy_dict)) for thread in thread_list: thread.setDaemon(True) thread.start() for thread in thread_list: thread.join() self.log.info("ProxyFetch - all complete!") for _ in proxy_dict.values(): if DoValidator.preValidator(_.proxy): yield _
class Fetcher(object): name = "fetcher" def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() self.proxy_handler = ProxyHandler() def fetch(self): """ fetch proxy with proxyFetcher :return: """ proxy_set = set() self.log.info("ProxyFetch : start") for fetch_name in self.conf.fetchers: self.log.info("ProxyFetch - {func}: start".format(func=fetch_name)) fetcher = getattr(ProxyFetcher, fetch_name, None) if not fetcher: self.log.error("ProxyFetch - {func}: class method not exists!".format(func=fetch_name)) continue if not callable(fetcher): self.log.error("ProxyFetch - {func}: must be class method".format(func=fetch_name)) continue try: for proxy in fetcher(): if proxy in proxy_set: self.log.info('ProxyFetch - %s: %s exist' % (fetch_name, proxy.ljust(23))) continue else: self.log.info('ProxyFetch - %s: %s success' % (fetch_name, proxy.ljust(23))) if proxy.strip(): proxy_set.add(proxy) except Exception as e: self.log.error("ProxyFetch - {func}: error".format(func=fetch_name)) self.log.error(str(e)) self.log.info("ProxyFetch - all complete!") return proxy_set
class Fetcher(object): name = "fetcher" def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() self.proxy_handler = ProxyHandler() self.loadIp() def loadIp(self): if False != os.path.isfile("qqwry.dat"): self.ip = QQwry() self.ip.load_file('qqwry.dat') else: self.ip = False def fetch(self): """ fetch proxy into db with proxyFetcher :return: """ proxy_set = set() self.log.info("ProxyFetch : start") for fetch_name in self.conf.fetchers: self.log.info("ProxyFetch - {func}: start".format(func=fetch_name)) fetcher = getattr(ProxyFetcher, fetch_name, None) if not fetcher: self.log.error("ProxyFetch - {func}: class method not exists!") continue if not callable(fetcher): self.log.error("ProxyFetch - {func}: must be class method") continue try: for proxy in fetcher(): if proxy in proxy_set: self.log.info('ProxyFetch - %s: %s exist' % (fetch_name, proxy.ljust(23))) continue else: self.log.info('ProxyFetch - %s: %s success' % (fetch_name, proxy.ljust(23))) if proxy.strip(): if self.ip: area = " ".join(self.ip.lookup( proxy.split(':')[0])) else: self.loadIp() area = '' proxy_set.add((proxy, fetch_name, area)) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=fetch_name)) self.log.error(str(e)) self.log.info("ProxyFetch - all complete!") return proxy_set
class WebRequest(object): name = "web_request" def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response() @property def user_agent(self): """ return an User-Agent at random :return: """ ua_list = [ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', ] return random.choice(ua_list) @property def header(self): """ basic header :return: """ return { 'User-Agent': self.user_agent, 'Accept': '*/*', 'Connection': 'keep-alive', 'Accept-Language': 'zh-CN,zh;q=0.8' } def get(self, url, header=None, retry_time=3, retry_interval=5, timeout=5, *args, **kwargs): """ get method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ headers = self.header if header and isinstance(header, dict): headers.update(header) while True: try: self.response = requests.get(url, headers=headers, timeout=timeout, *args, **kwargs) return self except Exception as e: self.log.error("requests: %s error: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: resp = Response() resp.status_code = 200 return self self.log.info("retry %s second after" % retry_interval) time.sleep(retry_interval) @property def tree(self): return etree.HTML(self.response.content) @property def text(self): return self.response.text @property def json(self): try: return self.response.json() except Exception as e: self.log.error(str(e)) return {}
def testLogHandler(): log = LogHandler('test') log.info('this is info') log.error('this is error')
def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler()
Description : 启动器 Author : JHao date: 2021/3/26 ------------------------------------------------- Change Activity: 2021/3/26: 启动器 ------------------------------------------------- """ __author__ = 'JHao' import sys from db.dbClient import DbClient from handler.logHandler import LogHandler from handler.configHandler import ConfigHandler log = LogHandler('launcher') def startServer(): __beforeStart() from api.proxyApi import runFlask runFlask() def startScheduler(): __beforeStart() from helper.scheduler import runScheduler runScheduler() def __beforeStart():
def setUp(self): self.verifier = IcmpRouteVerifier() self.handlers = [LogHandler()] self.route = Route('0.0.0.0', '192.168.1.1', '0.0.0.0', 'eth0', ['8.8.8.8', '8.8.4.4'], 10, self.verifier, {'timeout':2, 'maxRetry':3}, self.handlers, [{}])
def testCreateRoutesFromProperties(self): properties = {'route.primary.gateway':'192.168.1.1' , 'route.primary.iface':'eth0' , 'route.primary.targets':'8.8.8.8,8.8.4.4' , 'route.primary.verifierDelay':'10' , 'route.primary.verifier':'../resources/icmpRouteVerifier.pkl' , 'route.primary.verifierKwargs':"{'timeout':1, 'maxRetry':2}" , 'route.primary.handlers':'["../resources/logHandler.pkl","../resources/logHandler.pkl"]' , 'route.primary.handlerKwargs':"[{},{}]" , 'route.fona.gateway':'0.0.0.0' , 'route.fona.iface':'ppp0'} routes = createRoutesFromProperties(properties) assert 2 == len(routes) primary = routes[0] assert '0.0.0.0' == primary.destination assert '192.168.1.1' == primary.gateway assert '0.0.0.0' == primary.genmask assert 'eth0' == primary.iface assert ['8.8.8.8', '8.8.4.4'] == primary.targets assert 10 == primary.verifierDelay assert IcmpRouteVerifier() == primary.verifier assert {'timeout':1, 'maxRetry':2} == primary.verifierKwargs assert [LogHandler(), LogHandler()] == primary.handlers assert [{},{}] == primary.handlerKwargs primaryTargetRoutes = primary.getTargetRoutes() assert 2 == len(primaryTargetRoutes) primaryTargetRoute1 = primaryTargetRoutes[0] assert '8.8.8.8' == primaryTargetRoute1.destination assert '192.168.1.1' == primaryTargetRoute1.gateway assert '255.255.255.255' == primaryTargetRoute1.genmask assert 'eth0' == primaryTargetRoute1.iface assert 0 == len(primaryTargetRoute1.targets) assert 0 == primaryTargetRoute1.verifierDelay assert None == primaryTargetRoute1.verifier assert {} == primaryTargetRoute1.verifierKwargs assert [] == primaryTargetRoute1.handlers assert [] == primaryTargetRoute1.handlerKwargs primaryTargetRoute2 = primaryTargetRoutes[1] assert '8.8.4.4' == primaryTargetRoute2.destination assert '192.168.1.1' == primaryTargetRoute2.gateway assert '255.255.255.255' == primaryTargetRoute2.genmask assert 'eth0' == primaryTargetRoute2.iface assert 0 == len(primaryTargetRoute2.targets) assert 0 == primaryTargetRoute2.verifierDelay assert None == primaryTargetRoute2.verifier assert {} == primaryTargetRoute2.verifierKwargs assert [] == primaryTargetRoute2.handlers assert [] == primaryTargetRoute2.handlerKwargs fona = routes[1] assert '0.0.0.0' == fona.destination assert '0.0.0.0' == fona.gateway assert '0.0.0.0' == fona.genmask assert 'ppp0' == fona.iface assert 0 == len(fona.targets) assert 0 == fona.verifierDelay assert None == fona.verifier assert {} == fona.verifierKwargs assert [] == fona.handlers assert [] == fona.handlerKwargs
class WebRequest(object): name = "web_request" def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response() @property def user_agent(self): """ return an User-Agent at random :return: """ ua_list = [ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36", "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36", ] return random.choice(ua_list) @property def header(self): """ basic header :return: """ return { 'User-Agent': self.user_agent, 'Accept': '*/*', 'Connection': 'keep-alive', 'Accept-Language': 'zh-CN,zh;q=0.9' } def get(self, url, proxies=None, retry_time=3, retry_interval=3, timeout=5, *args, **kwargs): """ get method :param url: target url :param header: headers :param retry_time: retry time :param retry_interval: retry interval :param timeout: network timeout :return: """ # headers = self.header # if header and isinstance(header, dict): # headers.update(header) # proxies = None # if useProxies: # proxies = {'http': MAINPROXY, 'https': MAINPROXY} while True: try: self.response = requests.get(url, proxies=proxies, headers=self.header, timeout=timeout, *args, **kwargs) return self except Exception as e: self.log.error("requests: %s error: %s" % (url, str(e))) retry_time -= 1 if retry_time <= 0: # resp = Response() # resp.status_code = 500 self.response.status_code = 500 return self self.log.info("retry %s second after" % retry_interval) time.sleep(retry_interval) @property def tree(self): if self.response.status_code == 200: return etree.HTML(self.response.content) @property def text(self): return self.response.text
import requests import random import re from re import findall from urllib.parse import urlparse from handler.configHandler import ConfigHandler from handler.logHandler import LogHandler from helper.proxy import Proxy from setting import VERIFY_URL from util.webRequest import WebRequest conf = ConfigHandler() validators = [] logger = LogHandler("validators") def validator(func): validators.append(func) return func @validator def formatValidator(proxy): """ 检查代理格式 :param proxy: :return: """ return True
class _ThreadChecker(Thread): """ 多线程检测 """ def __init__(self, work_type, target_queue, thread_name): Thread.__init__(self, name=thread_name) self.work_type = work_type self.log = LogHandler("checker") self.proxy_handler = ProxyHandler() self.target_queue = target_queue self.conf = ConfigHandler() def run(self): self.log.info("{}ProxyCheck - {}: start".format( self.work_type.title(), self.name)) while True: try: proxy = self.target_queue.get(block=False) except Empty: self.log.info("{}ProxyCheck - {}: complete".format( self.work_type.title(), self.name)) break proxy = DoValidator.validator(proxy, self.work_type) if self.work_type == "raw": self.__ifRaw(proxy) else: self.__ifUse(proxy) self.target_queue.task_done() def __ifRaw(self, proxy): if proxy.last_status: if self.proxy_handler.exists(proxy): self.log.info('RawProxyCheck - {}: {} exist'.format( self.name, proxy.proxy.ljust(23))) else: self.log.info('RawProxyCheck - {}: {} pass'.format( self.name, proxy.proxy.ljust(23))) self.proxy_handler.put(proxy) else: self.log.info('RawProxyCheck - {}: {} fail'.format( self.name, proxy.proxy.ljust(23))) def __ifUse(self, proxy): if proxy.last_status: self.log.info('UseProxyCheck - {}: {} pass'.format( self.name, proxy.proxy.ljust(23))) self.proxy_handler.put(proxy) else: if proxy.fail_count > self.conf.maxFailCount: self.log.info( 'UseProxyCheck - {}: {} fail, count {} delete'.format( self.name, proxy.proxy.ljust(23), proxy.fail_count)) self.proxy_handler.delete(proxy) else: self.log.info( 'UseProxyCheck - {}: {} fail, count {} keep'.format( self.name, proxy.proxy.ljust(23), proxy.fail_count)) self.proxy_handler.put(proxy)
class Checker(Thread): """ 多线程检测代理是否可用 """ def __init__(self, check_type, queue, thread_name): Thread.__init__(self, name=thread_name) self.type = check_type self.log = LogHandler("checker") self.proxy_handler = ProxyHandler() self.queue = queue self.conf = ConfigHandler() def run(self): self.log.info("ProxyCheck - {} : start".format(self.name)) while True: try: proxy_json = self.queue.get(block=False) except Empty: self.log.info("ProxyCheck - {} : complete".format(self.name)) break proxy = Proxy.createFromJson(proxy_json) proxy = proxyCheck(proxy) if self.type == "raw": if proxy.last_status: if self.proxy_handler.exists(proxy): self.log.info('ProxyCheck - {} : {} exists'.format( self.name, proxy.proxy.ljust(23))) else: self.log.info('ProxyCheck - {} : {} success'.format( self.name, proxy.proxy.ljust(23))) self.proxy_handler.put(proxy) else: self.log.info('ProxyCheck - {} : {} fail'.format( self.name, proxy.proxy.ljust(23))) else: if proxy.last_status: self.log.info('ProxyCheck - {} : {} pass'.format( self.name, proxy.proxy.ljust(23))) self.proxy_handler.update(proxy) else: if proxy.fail_count > self.conf.maxFailCount: self.log.info( 'ProxyCheck - {} : {} fail, count {} delete'. format(self.name, proxy.proxy.ljust(23), proxy.fail_count)) self.proxy_handler.delete(proxy) else: self.log.info( 'ProxyCheck - {} : {} fail, count {} keep'.format( self.name, proxy.proxy.ljust(23), proxy.fail_count)) self.proxy_handler.update(proxy) self.queue.task_done()
def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() self.proxy_handler = ProxyHandler()
import os, sys from serializationUtil import serialize from verifier.icmpRouteVerifier import IcmpRouteVerifier from handler.httpHandler import HttpHandler from handler.fonaRaspberryPiHandler import FonaRaspberryPiHandler from handler.logHandler import LogHandler dir = sys.argv[1] if not os.path.exists(dir): os.makedirs(dir) filelist = [f for f in os.listdir(dir) if f.endswith(".pkl")] for f in filelist: os.remove(os.path.join(dir, f)) serialize(IcmpRouteVerifier(), os.path.join(dir, 'icmpRouteVerifier.pkl')) serialize(HttpHandler(), os.path.join(dir, 'httpHandler.pkl')) serialize(FonaRaspberryPiHandler(), os.path.join(dir, 'fonaRaspberryPiHandler.pkl')) serialize(LogHandler(), os.path.join(dir, 'logHandler.pkl'))
class Fetcher(object): name = "fetcher" def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() self.proxy_handler = ProxyHandler() def fetch(self): """ fetch proxy into db with proxyFetcher :return: """ proxy_set = set() url_set = set() self.log.info("ProxyFetch : start") for fetch_name in self.conf.fetchers: self.log.info("ProxyFetch - {func}: start".format(func=fetch_name)) fetcher = getattr(ProxyFetcher, fetch_name, None) if not fetcher: self.log.error("ProxyFetch - {func}: class method not exists!") continue if not callable(fetcher): self.log.error("ProxyFetch - {func}: must be class method") continue try: for proxy in fetcher(): if proxy.url in url_set: self.log.info( f'ProxyFetch - {fetch_name}: {proxy.url} exist') continue self.log.info( f'ProxyFetch - {fetch_name}: {p.url} success') for tag in VERIFY_URL.keys(): p = deepcopy(proxy) p.tag = tag proxy_set.add(p) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=fetch_name)) self.log.error(str(e)) self.log.info("ProxyFetch - all complete!") return proxy_set
def __init__(self, *args, **kwargs): self.log = LogHandler(self.name, file=False) self.response = Response()
class Fetcher(object): name = "fetcher" def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() self.proxy_handler = ProxyHandler() def run(self): """ fetch proxy with proxyFetcher :return: """ proxy_dict = dict() self.log.info("ProxyFetch : start") for fetch_source in self.conf.fetchers: self.log.info( "ProxyFetch - {func}: start".format(func=fetch_source)) fetcher = getattr(ProxyFetcher, fetch_source, None) if not fetcher: self.log.error( "ProxyFetch - {func}: class method not exists!".format( func=fetch_source)) continue if not callable(fetcher): self.log.error( "ProxyFetch - {func}: must be class method".format( func=fetch_source)) continue try: for proxy in fetcher(): self.log.info('ProxyFetch - %s: %s ok' % (fetch_source, proxy.ljust(23))) proxy = proxy.strip() if proxy in proxy_dict: proxy_dict[proxy].add_source(fetch_source) else: proxy_dict[proxy] = Proxy(proxy, source=fetch_source) except Exception as e: self.log.error( "ProxyFetch - {func}: error".format(func=fetch_source)) self.log.error(str(e)) self.log.info("ProxyFetch - all complete!") for _ in proxy_dict.values(): if DoValidator.preValidator(_.proxy): yield _