def __init__(self, check_type, queue, thread_name): Thread.__init__(self, name=thread_name) self.type = check_type self.log = LogHandler("checker") self.proxy_handler = ProxyHandler() self.queue = queue self.conf = ConfigHandler()
def runScheduler(): _runProxyFetch() timezone = ConfigHandler().timezone scheduler_log = LogHandler("scheduler") scheduler = BlockingScheduler(logger=scheduler_log, timezone=timezone) scheduler.add_job(_runProxyFetch, 'interval', minutes=4, id="proxy_fetch", name="proxy采集") scheduler.add_job(_runProxyCheck, 'interval', minutes=2, id="proxy_check", name="proxy检查") executors = { 'default': { 'type': 'threadpool', 'max_workers': 20 }, 'processpool': ProcessPoolExecutor(max_workers=5) } job_defaults = {'coalesce': False, 'max_instances': 10} scheduler.configure(executors=executors, job_defaults=job_defaults, timezone=timezone) scheduler.start()
def runDownScheduler(): """ down shecdule random(1-5)hour :return: """ timezone = ConfigHandler().timezone scheduler_log = LogHandler("schedule") scheduler = BackgroundScheduler(logger=scheduler_log, timezone=timezone) intlTime = random.randint(1, 5) scheduler.add_job(execDown(random.randint(5, 30)), 'interval', hour=intlTime, id="down_url", name="url下载") executors = { 'default': { 'type': 'threadpool', 'max_workers': 20 }, 'processpool': ProcessPoolExecutor(max_workers=5) } job_defaults = {'coalesce': False, 'max_instances': 10} scheduler.configure(executors=executors, job_defaults=job_defaults, timezone=timezone) scheduler.start()
def __init__(self, work_type, target_queue, thread_name): Thread.__init__(self, name=thread_name) self.work_type = work_type self.log = LogHandler("checker") self.proxy_handler = ProxyHandler() self.target_queue = target_queue self.conf = ConfigHandler()
def __init__(self, fetch_source, proxy_dict): Thread.__init__(self) self.fetch_source = fetch_source self.proxy_dict = proxy_dict self.fetcher = getattr(ProxyFetcher, fetch_source, None) self.log = LogHandler("fetcher") self.conf = ConfigHandler() self.proxy_handler = ProxyHandler()
def testConfig(): """ :return: """ conf = ConfigHandler() print(conf.dbConn) print(conf.serverPort) print(conf.serverHost) print(conf.tableName) assert isinstance(conf.fetchers, list) print(conf.fetchers)
def __checkDBConfig(): conf = ConfigHandler() db = DbClient(conf.dbConn) log.info("============ DATABASE CONFIGURE ================") log.info("DB_TYPE: %s" % db.db_type) log.info("DB_HOST: %s" % db.db_host) log.info("DB_PORT: %s" % db.db_port) log.info("DB_NAME: %s" % db.db_name) log.info("DB_USER: %s" % db.db_user) log.info("=================================================") return db.test()
def testProxyFetcher(): conf = ConfigHandler() proxy_getter_functions = conf.fetchers proxy_counter = {_: 0 for _ in proxy_getter_functions} for proxyGetter in proxy_getter_functions: for proxy in getattr(ProxyFetcher, proxyGetter.strip())(): if proxy: print('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy)) proxy_counter[proxyGetter] = proxy_counter.get(proxyGetter) + 1 for key, value in proxy_counter.items(): print(key, value)
def testProxyFetcher(): conf = ConfigHandler() proxy_getter_functions = conf.fetchers for proxyGetter in proxy_getter_functions: proxy_count = 0 for proxy in getattr(ProxyFetcher, proxyGetter.strip())(): if proxy: print('{func}: fetch proxy {proxy},proxy_count:{proxy_count}'. format(func=proxyGetter, proxy=proxy, proxy_count=proxy_count)) proxy_count += 1
2020/06/23: 新增pop接口 ------------------------------------------------- """ __author__ = 'JHao' import platform from werkzeug.wrappers import Response from flask import Flask, jsonify, request from util.six import iteritems from helper.proxy import Proxy from handler.proxyHandler import ProxyHandler from handler.configHandler import ConfigHandler app = Flask(__name__) conf = ConfigHandler() proxy_handler = ProxyHandler() class JsonResponse(Response): @classmethod def force_type(cls, response, environ=None): if isinstance(response, (dict, list)): response = jsonify(response) return super(JsonResponse, cls).force_type(response, environ) app.response_class = JsonResponse api_list = [{
def __init__(self): self.conf = ConfigHandler() self.db = DbClient(self.conf.dbConn) self.db.changeTable(self.conf.tableName)
def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler() self.proxy_handler = ProxyHandler()
def __init__(self): self.log = LogHandler(self.name) self.conf = ConfigHandler()
def __showConfigure(): conf = ConfigHandler() log.info("ProxyPool configure HOST: %s" % conf.serverHost) log.info("ProxyPool configure PORT: %s" % conf.serverPort) log.info("ProxyPool configure PROXY_FETCHER: %s" % conf.fetchers)
class DoValidator(object): """ 执行校验 """ conf = ConfigHandler() @classmethod def validator(cls, proxy, work_type): """ 校验入口 Args: proxy: Proxy Object work_type: raw/use Returns: Proxy Object """ http_r = cls.httpValidator(proxy) https_r = False if not http_r else cls.httpsValidator(proxy) proxy.check_count += 1 proxy.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") proxy.last_status = True if http_r else False if http_r: if proxy.fail_count > 0: proxy.fail_count -= 1 proxy.https = True if https_r else False if work_type == "raw": proxy.region = cls.regionGetter( proxy) if cls.conf.proxyRegion else "" else: proxy.fail_count += 1 return proxy @classmethod def httpValidator(cls, proxy): for func in ProxyValidator.http_validator: if not func(proxy.proxy): return False return True @classmethod def httpsValidator(cls, proxy): for func in ProxyValidator.https_validator: if not func(proxy.proxy): return False return True @classmethod def preValidator(cls, proxy): for func in ProxyValidator.pre_validator: if not func(proxy): return False return True @classmethod def regionGetter(cls, proxy): try: url = 'https://searchplugin.csdn.net/api/v1/ip/get?ip=%s' % proxy.proxy.split( ':')[0] r = WebRequest().get(url=url, retry_time=1, timeout=2).json return r['data']['address'] except: return 'error'