Esempio n. 1
0
class ProxyManager(object):
    def __init__(self):
        self.logger = LogHandler('ProxyManger')
        self.dbClient = RedisClient()
        self.config = GetConfig()
        self.orignal_proxy_name,self.useful_proxy_name = self.config.dbName

    def refresh(self):
        for proxyGetFunc in self.config.proxyGetter:
            # proxy_set = set()
            for proxy in getattr(ProxyGetter,proxyGetFunc.strip())():
                if proxy:
                    self.logger.info("{func} fetch proxy {proxy}".format(func=proxyGetFunc,proxy=proxy))
                    # proxy_set.add(proxy.strip())
                    self.dbClient.lput(self.orignal_proxy_name,proxy)

    def get(self):
        """
        从可用代理池中获取一个可用代理
        :return: one useful proxy
        """
        return self.dbClient.sgetOne(self.useful_proxy_name)[0]

    def getAll(self):
        return self.dbClient.sgetAll(self.useful_proxy_name)

    def spop(self):
        """
        从可用代理池中获随机一个代理,并删除
        """
        return self.dbClient.spop(self.useful_proxy_name)

    def pop(self):
        """
        从原始代理池中获一个然后删除
        :return: one original proxy
        """
        return self.dbClient.rpop(self.orignal_proxy_name)

    def put(self,value):
        """
        save validproxy into useful proxy pool
        :param value: 
        :return: 
        """
        self.dbClient.sput(self.useful_proxy_name,value)

    def delete(self, value):
        """
        可用数据库中删除一个代理
        :return: 
        """
        self.dbClient.sdeleteValue(self.useful_proxy_name,value)

    def getStatus(self):
        return self.dbClient.sgetStatues()
Esempio n. 2
0
class ProxyValidSchedule(ProxyManager):
    def __init__(self):
        super(ProxyValidSchedule, self).__init__()
        self.logger = LogHandler("valid_schedule")

    def valid_Proxy(self):
        proxy = self.spop()
        temp_pool = [
            "115.186.179.217:53281", "125.62.193.18:53281",
            "115.220.148.137:808", "118.117.136.19:9000", "85.185.111.121:80",
            "176.237.139.8:8080", "125.89.123.224:808", "60.167.132.226:808",
            "180.118.241.200:808"
        ]
        self.logger.info("*************Start Valid proxies************* ")
        while proxy:
            if isinstance(proxy, bytes):
                proxy = proxy.decode("utf-8")
            if validProxy(proxy):
                temp_pool.append(proxy)
                self.logger.info("{0} validation pass".format(proxy))
            else:
                self.logger.info("{0} validation failed".format(proxy))
            proxy = self.spop()
        self.logger.info(
            "##############Valid proxies is complete ################ ")
        if temp_pool:
            for proxy in temp_pool:
                self.put(proxy)
Esempio n. 3
0
class ProxyRefreshSchedule(ProxyManager):
    def __init__(self):
        ProxyManager.__init__(self)
        self.logger = LogHandler("refresh_shedule")

    def valid_Proxy(self):
        """
        valid the proxy in origal proxy pool, put the useful proxies in useful proxy pool
        :return: 
        """
        proxy = self.pop()
        self.logger.info("*************Start Valid proxies************* ")
        while proxy:
            if validProxy(proxy.decode("utf-8")):
                self.put(proxy)
                self.logger.info("{0} validation pass".format(proxy))
            else:
                self.logger.info("{0} validation failed".format(proxy))
            proxy = self.pop()
        self.logger.info("##############Valid proxies is complete ################ ")
Esempio n. 4
0
from Util.logHandler import LogHandler
import requests
from selenium import webdriver
from lxml import etree
logger = LogHandler(__name__)


def validProxy(proxy):
    proxies = {
        'https': 'https://' + proxy,
        'http': 'http://' + proxy,
    }
    headers = {
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
    }
    try:
        response = requests.get("https://www.baidu.com",
                                headers=headers,
                                proxies=proxies,
                                timeout=20,
                                verify=False)
        if response.status_code == 200:
            logger.info("{proxy} is ok!".format(proxy=proxy))
            return True
Esempio n. 5
0
 def __init__(self):
     super(ProxyValidSchedule, self).__init__()
     self.logger = LogHandler("valid_schedule")
Esempio n. 6
0
 def __init__(self):
     ProxyManager.__init__(self)
     self.logger = LogHandler("refresh_shedule")
Esempio n. 7
0
 def __init__(self):
     self.logger = LogHandler('ProxyManger')
     self.dbClient = RedisClient()
     self.config = GetConfig()
     self.orignal_proxy_name,self.useful_proxy_name = self.config.dbName