예제 #1
0
파일: check.py 프로젝트: bhlt/proxy_pool
 def __init__(self, check_type, queue, thread_name):
     Thread.__init__(self, name=thread_name)
     self.type = check_type
     self.log = LogHandler("checker")
     self.proxy_handler = ProxyHandler()
     self.queue = queue
     self.conf = ConfigHandler()
예제 #2
0
 def __init__(self, work_type, target_queue, thread_name):
     Thread.__init__(self, name=thread_name)
     self.work_type = work_type
     self.log = LogHandler("checker")
     self.proxy_handler = ProxyHandler()
     self.target_queue = target_queue
     self.conf = ConfigHandler()
예제 #3
0
 def __init__(self, fetch_source, proxy_dict):
     Thread.__init__(self)
     self.fetch_source = fetch_source
     self.proxy_dict = proxy_dict
     self.fetcher = getattr(ProxyFetcher, fetch_source, None)
     self.log = LogHandler("fetcher")
     self.conf = ConfigHandler()
     self.proxy_handler = ProxyHandler()
예제 #4
0
class _ThreadFetcher(Thread):

    def __init__(self, fetch_source, proxy_dict):
        Thread.__init__(self)
        self.fetch_source = fetch_source
        self.proxy_dict = proxy_dict
        self.fetcher = getattr(ProxyFetcher, fetch_source, None)
        self.log = LogHandler("fetcher")
        self.conf = ConfigHandler()
        self.proxy_handler = ProxyHandler()

    def run(self):
        self.log.info("ProxyFetch - {func}: start".format(func=self.fetch_source))
        try:
            for proxy in self.fetcher():
                self.log.info('ProxyFetch - %s: %s ok' % (self.fetch_source, proxy.ljust(23)))
                proxy = proxy.strip()
                if proxy in self.proxy_dict:
                    self.proxy_dict[proxy].add_source(self.fetch_source)
                else:
                    self.proxy_dict[proxy] = Proxy(
                        proxy, source=self.fetch_source)
        except Exception as e:
            self.log.error("ProxyFetch - {func}: error".format(func=self.fetch_source))
            self.log.error(str(e))
예제 #5
0
def runScheduler():
    _runProxyFetch()

    timezone = ConfigHandler().timezone
    scheduler_log = LogHandler("scheduler")
    scheduler = BlockingScheduler(logger=scheduler_log, timezone=timezone)

    scheduler.add_job(_runProxyFetch,
                      'interval',
                      minutes=4,
                      id="proxy_fetch",
                      name="proxy采集")
    scheduler.add_job(_runProxyCheck,
                      'interval',
                      minutes=2,
                      id="proxy_check",
                      name="proxy检查")

    executors = {
        'default': {
            'type': 'threadpool',
            'max_workers': 20
        },
        'processpool': ProcessPoolExecutor(max_workers=5)
    }
    job_defaults = {'coalesce': False, 'max_instances': 10}

    scheduler.configure(executors=executors,
                        job_defaults=job_defaults,
                        timezone=timezone)

    scheduler.start()
예제 #6
0
def runDownScheduler():
    """
    down shecdule random(1-5)hour
    :return:
    """
    timezone = ConfigHandler().timezone
    scheduler_log = LogHandler("schedule")
    scheduler = BackgroundScheduler(logger=scheduler_log, timezone=timezone)

    intlTime = random.randint(1, 5)
    scheduler.add_job(execDown(random.randint(5, 30)),
                      'interval',
                      hour=intlTime,
                      id="down_url",
                      name="url下载")

    executors = {
        'default': {
            'type': 'threadpool',
            'max_workers': 20
        },
        'processpool': ProcessPoolExecutor(max_workers=5)
    }

    job_defaults = {'coalesce': False, 'max_instances': 10}

    scheduler.configure(executors=executors,
                        job_defaults=job_defaults,
                        timezone=timezone)

    scheduler.start()
예제 #7
0
 def test(self):
     log = LogHandler('ssdb_client')
     try:
         self.getCount()
     except TimeoutError as e:
         log.error('ssdb connection time out: %s' % str(e), exc_info=True)
         return e
     except ConnectionError as e:
         log.error('ssdb connection error: %s' % str(e), exc_info=True)
         return e
     except ResponseError as e:
         log.error('ssdb connection error: %s' % str(e), exc_info=True)
         return e
예제 #8
0
class Fetcher(object):
    name = "fetcher"

    def __init__(self):
        self.log = LogHandler(self.name)
        self.conf = ConfigHandler()

    def run(self):
        """
        fetch proxy with proxyFetcher
        :return:
        """
        proxy_dict = dict()
        thread_list = list()
        self.log.info("ProxyFetch : start")

        for fetch_source in self.conf.fetchers:
            self.log.info("ProxyFetch - {func}: start".format(func=fetch_source))
            fetcher = getattr(ProxyFetcher, fetch_source, None)
            if not fetcher:
                self.log.error("ProxyFetch - {func}: class method not exists!".format(func=fetch_source))
                continue
            if not callable(fetcher):
                self.log.error("ProxyFetch - {func}: must be class method".format(func=fetch_source))
                continue
            thread_list.append(_ThreadFetcher(fetch_source, proxy_dict))

        for thread in thread_list:
            thread.setDaemon(True)
            thread.start()

        for thread in thread_list:
            thread.join()

        self.log.info("ProxyFetch - all complete!")
        for _ in proxy_dict.values():
            if DoValidator.preValidator(_.proxy):
                yield _
예제 #9
0
class Fetcher(object):
    name = "fetcher"

    def __init__(self):
        self.log = LogHandler(self.name)
        self.conf = ConfigHandler()
        self.proxy_handler = ProxyHandler()

    def fetch(self):
        """
        fetch proxy with proxyFetcher
        :return:
        """
        proxy_set = set()
        self.log.info("ProxyFetch : start")
        for fetch_name in self.conf.fetchers:
            self.log.info("ProxyFetch - {func}: start".format(func=fetch_name))
            fetcher = getattr(ProxyFetcher, fetch_name, None)
            if not fetcher:
                self.log.error("ProxyFetch - {func}: class method not exists!".format(func=fetch_name))
                continue
            if not callable(fetcher):
                self.log.error("ProxyFetch - {func}: must be class method".format(func=fetch_name))
                continue

            try:
                for proxy in fetcher():
                    if proxy in proxy_set:
                        self.log.info('ProxyFetch - %s: %s exist' % (fetch_name, proxy.ljust(23)))
                        continue
                    else:
                        self.log.info('ProxyFetch - %s: %s success' % (fetch_name, proxy.ljust(23)))
                    if proxy.strip():
                        proxy_set.add(proxy)
            except Exception as e:
                self.log.error("ProxyFetch - {func}: error".format(func=fetch_name))
                self.log.error(str(e))
        self.log.info("ProxyFetch - all complete!")
        return proxy_set
예제 #10
0
class Fetcher(object):
    name = "fetcher"

    def __init__(self):
        self.log = LogHandler(self.name)
        self.conf = ConfigHandler()
        self.proxy_handler = ProxyHandler()
        self.loadIp()

    def loadIp(self):
        if False != os.path.isfile("qqwry.dat"):
            self.ip = QQwry()
            self.ip.load_file('qqwry.dat')
        else:
            self.ip = False

    def fetch(self):
        """
        fetch proxy into db with proxyFetcher
        :return:
        """
        proxy_set = set()
        self.log.info("ProxyFetch : start")
        for fetch_name in self.conf.fetchers:
            self.log.info("ProxyFetch - {func}: start".format(func=fetch_name))
            fetcher = getattr(ProxyFetcher, fetch_name, None)
            if not fetcher:
                self.log.error("ProxyFetch - {func}: class method not exists!")
                continue
            if not callable(fetcher):
                self.log.error("ProxyFetch - {func}: must be class method")
                continue

            try:
                for proxy in fetcher():
                    if proxy in proxy_set:
                        self.log.info('ProxyFetch - %s: %s exist' %
                                      (fetch_name, proxy.ljust(23)))
                        continue
                    else:
                        self.log.info('ProxyFetch - %s: %s success' %
                                      (fetch_name, proxy.ljust(23)))

                    if proxy.strip():
                        if self.ip:
                            area = " ".join(self.ip.lookup(
                                proxy.split(':')[0]))
                        else:
                            self.loadIp()
                            area = ''
                        proxy_set.add((proxy, fetch_name, area))

            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=fetch_name))
                self.log.error(str(e))
        self.log.info("ProxyFetch - all complete!")
        return proxy_set
예제 #11
0
class WebRequest(object):
    name = "web_request"

    def __init__(self, *args, **kwargs):
        self.log = LogHandler(self.name, file=False)
        self.response = Response()

    @property
    def user_agent(self):
        """
        return an User-Agent at random
        :return:
        """
        ua_list = [
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
            'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
            'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
        ]
        return random.choice(ua_list)

    @property
    def header(self):
        """
        basic header
        :return:
        """
        return {
            'User-Agent': self.user_agent,
            'Accept': '*/*',
            'Connection': 'keep-alive',
            'Accept-Language': 'zh-CN,zh;q=0.8'
        }

    def get(self,
            url,
            header=None,
            retry_time=3,
            retry_interval=5,
            timeout=5,
            *args,
            **kwargs):
        """
        get method
        :param url: target url
        :param header: headers
        :param retry_time: retry time
        :param retry_interval: retry interval
        :param timeout: network timeout
        :return:
        """
        headers = self.header
        if header and isinstance(header, dict):
            headers.update(header)
        while True:
            try:
                self.response = requests.get(url,
                                             headers=headers,
                                             timeout=timeout,
                                             *args,
                                             **kwargs)
                return self
            except Exception as e:
                self.log.error("requests: %s error: %s" % (url, str(e)))
                retry_time -= 1
                if retry_time <= 0:
                    resp = Response()
                    resp.status_code = 200
                    return self
                self.log.info("retry %s second after" % retry_interval)
                time.sleep(retry_interval)

    @property
    def tree(self):
        return etree.HTML(self.response.content)

    @property
    def text(self):
        return self.response.text

    @property
    def json(self):
        try:
            return self.response.json()
        except Exception as e:
            self.log.error(str(e))
            return {}
예제 #12
0
def testLogHandler():
    log = LogHandler('test')
    log.info('this is info')
    log.error('this is error')
예제 #13
0
 def __init__(self):
     self.log = LogHandler(self.name)
     self.conf = ConfigHandler()
예제 #14
0
   Description :   启动器
   Author :        JHao
   date:          2021/3/26
-------------------------------------------------
   Change Activity:
                   2021/3/26: 启动器
-------------------------------------------------
"""
__author__ = 'JHao'

import sys
from db.dbClient import DbClient
from handler.logHandler import LogHandler
from handler.configHandler import ConfigHandler

log = LogHandler('launcher')


def startServer():
    __beforeStart()
    from api.proxyApi import runFlask
    runFlask()


def startScheduler():
    __beforeStart()
    from helper.scheduler import runScheduler
    runScheduler()


def __beforeStart():
예제 #15
0
 def setUp(self):
     self.verifier = IcmpRouteVerifier()
     self.handlers = [LogHandler()]
     self.route = Route('0.0.0.0', '192.168.1.1', '0.0.0.0', 'eth0', ['8.8.8.8', '8.8.4.4'], 10, self.verifier, {'timeout':2, 'maxRetry':3}, self.handlers, [{}])
예제 #16
0
 def testCreateRoutesFromProperties(self):
     properties = {'route.primary.gateway':'192.168.1.1'
                   , 'route.primary.iface':'eth0'
                   , 'route.primary.targets':'8.8.8.8,8.8.4.4'
                   , 'route.primary.verifierDelay':'10'
                   , 'route.primary.verifier':'../resources/icmpRouteVerifier.pkl'
                   , 'route.primary.verifierKwargs':"{'timeout':1, 'maxRetry':2}"
                   , 'route.primary.handlers':'["../resources/logHandler.pkl","../resources/logHandler.pkl"]'
                   , 'route.primary.handlerKwargs':"[{},{}]"
                   , 'route.fona.gateway':'0.0.0.0'
                   , 'route.fona.iface':'ppp0'}
     
     routes = createRoutesFromProperties(properties)
     assert 2 == len(routes)
     
     primary = routes[0]
     assert '0.0.0.0' == primary.destination
     assert '192.168.1.1' == primary.gateway
     assert '0.0.0.0' == primary.genmask
     assert 'eth0' == primary.iface
     assert ['8.8.8.8', '8.8.4.4'] == primary.targets
     assert 10 == primary.verifierDelay
     assert IcmpRouteVerifier() == primary.verifier
     assert {'timeout':1, 'maxRetry':2} == primary.verifierKwargs
     assert [LogHandler(), LogHandler()] == primary.handlers
     assert [{},{}] == primary.handlerKwargs
     
     primaryTargetRoutes = primary.getTargetRoutes()
     assert 2 == len(primaryTargetRoutes)
     
     primaryTargetRoute1 = primaryTargetRoutes[0]
     assert '8.8.8.8' == primaryTargetRoute1.destination
     assert '192.168.1.1' == primaryTargetRoute1.gateway
     assert '255.255.255.255' == primaryTargetRoute1.genmask
     assert 'eth0' == primaryTargetRoute1.iface
     assert 0 == len(primaryTargetRoute1.targets)
     assert 0 == primaryTargetRoute1.verifierDelay
     assert None == primaryTargetRoute1.verifier
     assert {} == primaryTargetRoute1.verifierKwargs
     assert [] == primaryTargetRoute1.handlers
     assert [] == primaryTargetRoute1.handlerKwargs
     
     primaryTargetRoute2 = primaryTargetRoutes[1]
     assert '8.8.4.4' == primaryTargetRoute2.destination
     assert '192.168.1.1' == primaryTargetRoute2.gateway
     assert '255.255.255.255' == primaryTargetRoute2.genmask
     assert 'eth0' == primaryTargetRoute2.iface
     assert 0 == len(primaryTargetRoute2.targets)
     assert 0 == primaryTargetRoute2.verifierDelay
     assert None == primaryTargetRoute2.verifier
     assert {} == primaryTargetRoute2.verifierKwargs
     assert [] == primaryTargetRoute2.handlers
     assert [] == primaryTargetRoute2.handlerKwargs
     
     fona = routes[1]
     assert '0.0.0.0' == fona.destination
     assert '0.0.0.0' == fona.gateway
     assert '0.0.0.0' == fona.genmask
     assert 'ppp0' == fona.iface
     assert 0 == len(fona.targets)
     assert 0 == fona.verifierDelay
     assert None == fona.verifier
     assert {} == fona.verifierKwargs
     assert [] == fona.handlers
     assert [] == fona.handlerKwargs
예제 #17
0
class WebRequest(object):
    name = "web_request"

    def __init__(self, *args, **kwargs):
        self.log = LogHandler(self.name, file=False)
        self.response = Response()

    @property
    def user_agent(self):
        """
        return an User-Agent at random
        :return:
        """
        ua_list = [
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
            'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
            'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
            'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
            'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
            "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
            "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
            "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
        ]
        return random.choice(ua_list)

    @property
    def header(self):
        """
        basic header
        :return:
        """
        return {
            'User-Agent': self.user_agent,
            'Accept': '*/*',
            'Connection': 'keep-alive',
            'Accept-Language': 'zh-CN,zh;q=0.9'
        }

    def get(self,
            url,
            proxies=None,
            retry_time=3,
            retry_interval=3,
            timeout=5,
            *args,
            **kwargs):
        """
        get method
        :param url: target url
        :param header: headers
        :param retry_time: retry time
        :param retry_interval: retry interval
        :param timeout: network timeout
        :return:
        """
        # headers = self.header
        # if header and isinstance(header, dict):
        #     headers.update(header)
        # proxies = None
        # if useProxies:
        #     proxies = {'http': MAINPROXY, 'https': MAINPROXY}
        while True:
            try:
                self.response = requests.get(url,
                                             proxies=proxies,
                                             headers=self.header,
                                             timeout=timeout,
                                             *args,
                                             **kwargs)
                return self
            except Exception as e:
                self.log.error("requests: %s error: %s" % (url, str(e)))
                retry_time -= 1
                if retry_time <= 0:
                    # resp = Response()
                    # resp.status_code = 500
                    self.response.status_code = 500
                    return self
                self.log.info("retry %s second after" % retry_interval)
                time.sleep(retry_interval)

    @property
    def tree(self):
        if self.response.status_code == 200:
            return etree.HTML(self.response.content)

    @property
    def text(self):
        return self.response.text
예제 #18
0
import requests
import random
import re
from re import findall
from urllib.parse import urlparse
from handler.configHandler import ConfigHandler
from handler.logHandler import LogHandler
from helper.proxy import Proxy
from setting import VERIFY_URL
from util.webRequest import WebRequest

conf = ConfigHandler()
validators = []

logger = LogHandler("validators")


def validator(func):
    validators.append(func)
    return func


@validator
def formatValidator(proxy):
    """
    检查代理格式
    :param proxy:
    :return:
    """
    return True
예제 #19
0
class _ThreadChecker(Thread):
    """ 多线程检测 """
    def __init__(self, work_type, target_queue, thread_name):
        Thread.__init__(self, name=thread_name)
        self.work_type = work_type
        self.log = LogHandler("checker")
        self.proxy_handler = ProxyHandler()
        self.target_queue = target_queue
        self.conf = ConfigHandler()

    def run(self):
        self.log.info("{}ProxyCheck - {}: start".format(
            self.work_type.title(), self.name))
        while True:
            try:
                proxy = self.target_queue.get(block=False)
            except Empty:
                self.log.info("{}ProxyCheck - {}: complete".format(
                    self.work_type.title(), self.name))
                break
            proxy = DoValidator.validator(proxy, self.work_type)
            if self.work_type == "raw":
                self.__ifRaw(proxy)
            else:
                self.__ifUse(proxy)
            self.target_queue.task_done()

    def __ifRaw(self, proxy):
        if proxy.last_status:
            if self.proxy_handler.exists(proxy):
                self.log.info('RawProxyCheck - {}: {} exist'.format(
                    self.name, proxy.proxy.ljust(23)))
            else:
                self.log.info('RawProxyCheck - {}: {} pass'.format(
                    self.name, proxy.proxy.ljust(23)))
                self.proxy_handler.put(proxy)
        else:
            self.log.info('RawProxyCheck - {}: {} fail'.format(
                self.name, proxy.proxy.ljust(23)))

    def __ifUse(self, proxy):
        if proxy.last_status:
            self.log.info('UseProxyCheck - {}: {} pass'.format(
                self.name, proxy.proxy.ljust(23)))
            self.proxy_handler.put(proxy)
        else:
            if proxy.fail_count > self.conf.maxFailCount:
                self.log.info(
                    'UseProxyCheck - {}: {} fail, count {} delete'.format(
                        self.name, proxy.proxy.ljust(23), proxy.fail_count))
                self.proxy_handler.delete(proxy)
            else:
                self.log.info(
                    'UseProxyCheck - {}: {} fail, count {} keep'.format(
                        self.name, proxy.proxy.ljust(23), proxy.fail_count))
                self.proxy_handler.put(proxy)
예제 #20
0
파일: check.py 프로젝트: bhlt/proxy_pool
class Checker(Thread):
    """
    多线程检测代理是否可用
    """
    def __init__(self, check_type, queue, thread_name):
        Thread.__init__(self, name=thread_name)
        self.type = check_type
        self.log = LogHandler("checker")
        self.proxy_handler = ProxyHandler()
        self.queue = queue
        self.conf = ConfigHandler()

    def run(self):
        self.log.info("ProxyCheck - {}  : start".format(self.name))
        while True:
            try:
                proxy_json = self.queue.get(block=False)
            except Empty:
                self.log.info("ProxyCheck - {}  : complete".format(self.name))
                break

            proxy = Proxy.createFromJson(proxy_json)
            proxy = proxyCheck(proxy)
            if self.type == "raw":
                if proxy.last_status:
                    if self.proxy_handler.exists(proxy):
                        self.log.info('ProxyCheck - {}  : {} exists'.format(
                            self.name, proxy.proxy.ljust(23)))
                    else:
                        self.log.info('ProxyCheck - {}  : {} success'.format(
                            self.name, proxy.proxy.ljust(23)))
                        self.proxy_handler.put(proxy)
                else:
                    self.log.info('ProxyCheck - {}  : {} fail'.format(
                        self.name, proxy.proxy.ljust(23)))
            else:
                if proxy.last_status:
                    self.log.info('ProxyCheck - {}  : {} pass'.format(
                        self.name, proxy.proxy.ljust(23)))
                    self.proxy_handler.update(proxy)
                else:
                    if proxy.fail_count > self.conf.maxFailCount:
                        self.log.info(
                            'ProxyCheck - {}  : {} fail, count {} delete'.
                            format(self.name, proxy.proxy.ljust(23),
                                   proxy.fail_count))
                        self.proxy_handler.delete(proxy)
                    else:
                        self.log.info(
                            'ProxyCheck - {}  : {} fail, count {} keep'.format(
                                self.name, proxy.proxy.ljust(23),
                                proxy.fail_count))
                        self.proxy_handler.update(proxy)
            self.queue.task_done()
예제 #21
0
 def __init__(self):
     self.log = LogHandler(self.name)
     self.conf = ConfigHandler()
     self.proxy_handler = ProxyHandler()
예제 #22
0
import os, sys
from serializationUtil import serialize
from verifier.icmpRouteVerifier import IcmpRouteVerifier
from handler.httpHandler import HttpHandler
from handler.fonaRaspberryPiHandler import FonaRaspberryPiHandler
from handler.logHandler import LogHandler

dir = sys.argv[1]
if not os.path.exists(dir):
    os.makedirs(dir)

filelist = [f for f in os.listdir(dir) if f.endswith(".pkl")]
for f in filelist:
    os.remove(os.path.join(dir, f))

serialize(IcmpRouteVerifier(), os.path.join(dir, 'icmpRouteVerifier.pkl'))
serialize(HttpHandler(), os.path.join(dir, 'httpHandler.pkl'))
serialize(FonaRaspberryPiHandler(),
          os.path.join(dir, 'fonaRaspberryPiHandler.pkl'))
serialize(LogHandler(), os.path.join(dir, 'logHandler.pkl'))
예제 #23
0
class Fetcher(object):
    name = "fetcher"

    def __init__(self):
        self.log = LogHandler(self.name)
        self.conf = ConfigHandler()
        self.proxy_handler = ProxyHandler()

    def fetch(self):
        """
        fetch proxy into db with proxyFetcher
        :return:
        """
        proxy_set = set()
        url_set = set()
        self.log.info("ProxyFetch : start")
        for fetch_name in self.conf.fetchers:
            self.log.info("ProxyFetch - {func}: start".format(func=fetch_name))
            fetcher = getattr(ProxyFetcher, fetch_name, None)
            if not fetcher:
                self.log.error("ProxyFetch - {func}: class method not exists!")
                continue
            if not callable(fetcher):
                self.log.error("ProxyFetch - {func}: must be class method")
                continue

            try:
                for proxy in fetcher():
                    if proxy.url in url_set:
                        self.log.info(
                            f'ProxyFetch - {fetch_name}: {proxy.url} exist')
                        continue
                    self.log.info(
                        f'ProxyFetch - {fetch_name}: {p.url} success')
                    for tag in VERIFY_URL.keys():
                        p = deepcopy(proxy)
                        p.tag = tag
                        proxy_set.add(p)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=fetch_name))
                self.log.error(str(e))
        self.log.info("ProxyFetch - all complete!")
        return proxy_set
예제 #24
0
 def __init__(self, *args, **kwargs):
     self.log = LogHandler(self.name, file=False)
     self.response = Response()
예제 #25
0
파일: fetch.py 프로젝트: ysctest/proxy_pool
class Fetcher(object):
    name = "fetcher"

    def __init__(self):
        self.log = LogHandler(self.name)
        self.conf = ConfigHandler()
        self.proxy_handler = ProxyHandler()

    def run(self):
        """
        fetch proxy with proxyFetcher
        :return:
        """
        proxy_dict = dict()
        self.log.info("ProxyFetch : start")
        for fetch_source in self.conf.fetchers:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=fetch_source))
            fetcher = getattr(ProxyFetcher, fetch_source, None)
            if not fetcher:
                self.log.error(
                    "ProxyFetch - {func}: class method not exists!".format(
                        func=fetch_source))
                continue
            if not callable(fetcher):
                self.log.error(
                    "ProxyFetch - {func}: must be class method".format(
                        func=fetch_source))
                continue

            try:
                for proxy in fetcher():
                    self.log.info('ProxyFetch - %s: %s ok' %
                                  (fetch_source, proxy.ljust(23)))
                    proxy = proxy.strip()
                    if proxy in proxy_dict:
                        proxy_dict[proxy].add_source(fetch_source)
                    else:
                        proxy_dict[proxy] = Proxy(proxy, source=fetch_source)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=fetch_source))
                self.log.error(str(e))
        self.log.info("ProxyFetch - all complete!")
        for _ in proxy_dict.values():
            if DoValidator.preValidator(_.proxy):
                yield _