コード例 #1
0
ファイル: check.py プロジェクト: bhlt/proxy_pool
 def __init__(self, check_type, queue, thread_name):
     Thread.__init__(self, name=thread_name)
     self.type = check_type
     self.log = LogHandler("checker")
     self.proxy_handler = ProxyHandler()
     self.queue = queue
     self.conf = ConfigHandler()
コード例 #2
0
ファイル: scheduler.py プロジェクト: zishuang951/proxy_pool-1
def runScheduler():
    _runProxyFetch()

    timezone = ConfigHandler().timezone
    scheduler_log = LogHandler("scheduler")
    scheduler = BlockingScheduler(logger=scheduler_log, timezone=timezone)

    scheduler.add_job(_runProxyFetch,
                      'interval',
                      minutes=4,
                      id="proxy_fetch",
                      name="proxy采集")
    scheduler.add_job(_runProxyCheck,
                      'interval',
                      minutes=2,
                      id="proxy_check",
                      name="proxy检查")

    executors = {
        'default': {
            'type': 'threadpool',
            'max_workers': 20
        },
        'processpool': ProcessPoolExecutor(max_workers=5)
    }
    job_defaults = {'coalesce': False, 'max_instances': 10}

    scheduler.configure(executors=executors,
                        job_defaults=job_defaults,
                        timezone=timezone)

    scheduler.start()
コード例 #3
0
def runDownScheduler():
    """
    down shecdule random(1-5)hour
    :return:
    """
    timezone = ConfigHandler().timezone
    scheduler_log = LogHandler("schedule")
    scheduler = BackgroundScheduler(logger=scheduler_log, timezone=timezone)

    intlTime = random.randint(1, 5)
    scheduler.add_job(execDown(random.randint(5, 30)),
                      'interval',
                      hour=intlTime,
                      id="down_url",
                      name="url下载")

    executors = {
        'default': {
            'type': 'threadpool',
            'max_workers': 20
        },
        'processpool': ProcessPoolExecutor(max_workers=5)
    }

    job_defaults = {'coalesce': False, 'max_instances': 10}

    scheduler.configure(executors=executors,
                        job_defaults=job_defaults,
                        timezone=timezone)

    scheduler.start()
コード例 #4
0
ファイル: check.py プロジェクト: FairyWorld/learn_proxy_pool
 def __init__(self, work_type, target_queue, thread_name):
     Thread.__init__(self, name=thread_name)
     self.work_type = work_type
     self.log = LogHandler("checker")
     self.proxy_handler = ProxyHandler()
     self.target_queue = target_queue
     self.conf = ConfigHandler()
コード例 #5
0
ファイル: fetch.py プロジェクト: FairyWorld/learn_proxy_pool
 def __init__(self, fetch_source, proxy_dict):
     Thread.__init__(self)
     self.fetch_source = fetch_source
     self.proxy_dict = proxy_dict
     self.fetcher = getattr(ProxyFetcher, fetch_source, None)
     self.log = LogHandler("fetcher")
     self.conf = ConfigHandler()
     self.proxy_handler = ProxyHandler()
コード例 #6
0
def testConfig():
    """
    :return:
    """
    conf = ConfigHandler()
    print(conf.dbConn)
    print(conf.serverPort)
    print(conf.serverHost)
    print(conf.tableName)
    assert isinstance(conf.fetchers, list)
    print(conf.fetchers)
コード例 #7
0
def __checkDBConfig():
    conf = ConfigHandler()
    db = DbClient(conf.dbConn)
    log.info("============ DATABASE CONFIGURE ================")
    log.info("DB_TYPE: %s" % db.db_type)
    log.info("DB_HOST: %s" % db.db_host)
    log.info("DB_PORT: %s" % db.db_port)
    log.info("DB_NAME: %s" % db.db_name)
    log.info("DB_USER: %s" % db.db_user)
    log.info("=================================================")
    return db.test()
def testProxyFetcher():
    conf = ConfigHandler()
    proxy_getter_functions = conf.fetchers
    proxy_counter = {_: 0 for _ in proxy_getter_functions}
    for proxyGetter in proxy_getter_functions:
        for proxy in getattr(ProxyFetcher, proxyGetter.strip())():
            if proxy:
                print('{func}: fetch proxy {proxy}'.format(func=proxyGetter,
                                                           proxy=proxy))
                proxy_counter[proxyGetter] = proxy_counter.get(proxyGetter) + 1
    for key, value in proxy_counter.items():
        print(key, value)
コード例 #9
0
def testProxyFetcher():
    conf = ConfigHandler()
    proxy_getter_functions = conf.fetchers
    for proxyGetter in proxy_getter_functions:
        proxy_count = 0
        for proxy in getattr(ProxyFetcher, proxyGetter.strip())():
            if proxy:
                print('{func}: fetch proxy {proxy},proxy_count:{proxy_count}'.
                      format(func=proxyGetter,
                             proxy=proxy,
                             proxy_count=proxy_count))
                proxy_count += 1
コード例 #10
0
ファイル: proxyApi.py プロジェクト: zzCoding1998/proxy_pool
                   2020/06/23: 新增pop接口
-------------------------------------------------
"""
__author__ = 'JHao'

import platform
from werkzeug.wrappers import Response
from flask import Flask, jsonify, request

from util.six import iteritems
from helper.proxy import Proxy
from handler.proxyHandler import ProxyHandler
from handler.configHandler import ConfigHandler

app = Flask(__name__)
conf = ConfigHandler()
proxy_handler = ProxyHandler()


class JsonResponse(Response):
    @classmethod
    def force_type(cls, response, environ=None):
        if isinstance(response, (dict, list)):
            response = jsonify(response)

        return super(JsonResponse, cls).force_type(response, environ)


app.response_class = JsonResponse

api_list = [{
コード例 #11
0
ファイル: proxyHandler.py プロジェクト: bhlt/proxy_pool
 def __init__(self):
     self.conf = ConfigHandler()
     self.db = DbClient(self.conf.dbConn)
     self.db.changeTable(self.conf.tableName)
コード例 #12
0
ファイル: fetch.py プロジェクト: hubaoquan/proxy_tools
 def __init__(self):
     self.log = LogHandler(self.name)
     self.conf = ConfigHandler()
     self.proxy_handler = ProxyHandler()
コード例 #13
0
ファイル: fetch.py プロジェクト: FairyWorld/learn_proxy_pool
 def __init__(self):
     self.log = LogHandler(self.name)
     self.conf = ConfigHandler()
コード例 #14
0
def __showConfigure():
    conf = ConfigHandler()
    log.info("ProxyPool configure HOST: %s" % conf.serverHost)
    log.info("ProxyPool configure PORT: %s" % conf.serverPort)
    log.info("ProxyPool configure PROXY_FETCHER: %s" % conf.fetchers)
コード例 #15
0
ファイル: check.py プロジェクト: FairyWorld/learn_proxy_pool
class DoValidator(object):
    """ 执行校验 """

    conf = ConfigHandler()

    @classmethod
    def validator(cls, proxy, work_type):
        """
        校验入口
        Args:
            proxy: Proxy Object
            work_type: raw/use
        Returns:
            Proxy Object
        """
        http_r = cls.httpValidator(proxy)
        https_r = False if not http_r else cls.httpsValidator(proxy)

        proxy.check_count += 1
        proxy.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        proxy.last_status = True if http_r else False
        if http_r:
            if proxy.fail_count > 0:
                proxy.fail_count -= 1
            proxy.https = True if https_r else False
            if work_type == "raw":
                proxy.region = cls.regionGetter(
                    proxy) if cls.conf.proxyRegion else ""
        else:
            proxy.fail_count += 1
        return proxy

    @classmethod
    def httpValidator(cls, proxy):
        for func in ProxyValidator.http_validator:
            if not func(proxy.proxy):
                return False
        return True

    @classmethod
    def httpsValidator(cls, proxy):
        for func in ProxyValidator.https_validator:
            if not func(proxy.proxy):
                return False
        return True

    @classmethod
    def preValidator(cls, proxy):
        for func in ProxyValidator.pre_validator:
            if not func(proxy):
                return False
        return True

    @classmethod
    def regionGetter(cls, proxy):
        try:
            url = 'https://searchplugin.csdn.net/api/v1/ip/get?ip=%s' % proxy.proxy.split(
                ':')[0]
            r = WebRequest().get(url=url, retry_time=1, timeout=2).json
            return r['data']['address']
        except:
            return 'error'