예제 #1
0
파일: tester.py 프로젝트: JanMCHEN/website
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                print('正在测试', proxy)
                for url in TEST_URL:
                    real_proxy = 'http://' + proxy
                    async with session.get(url,
                                           proxy=real_proxy,
                                           timeout=15,
                                           allow_redirects=False) as response:
                        if response.status in VALID_STATUS_CODES:
                            self.redis.max(proxy,
                                           http=re.match(r'http[s]?',
                                                         url).group())
                            print('代理可用', proxy)
                        else:
                            self.redis.decrease(proxy)
                            print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #2
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self,proxy):
        '''
        测试单个代理
        :param proxy:
        :return: None
        '''
        #连接
        # 创建aiohttp的ClientSession对象,类似requests的session对象
        async with aiohttp.ClientSession() as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试',proxy)
                async with session.get(TEST_URL,proxy=real_proxy,verify_ssl=False,timeout=60,allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用',proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法',proxy)
            except (ClientError, aiohttp.client_exceptions.ClientProxyConnectionError, asyncio.TimeoutError, AttributeError) as e:
                print(e.args)
                self.redis.decrease(proxy)
                print('代理请求失败',proxy)

    def run(self):
        '''
        测试主函数
        :param self:
        :return: None
        '''
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                '''对指定数量内的代理进行测试'''
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                # 输出刷新 每次返回一点
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试台发生错误', e.args)
예제 #3
0
파일: tester.py 프로젝트: shuanx/IP_TOOL
class Tester(object):
    """检测代理是否能行"""
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试当个代理
        :param proxy:代理
        :return: None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)  # 防止ssl报错
        async with aiohttp.ClientSession(
                connector=conn) as session:  # 创建session
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = "http://" + proxy
                print("正在尝试:", real_proxy)
                async with session.get(test_url, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求响应不合法", proxy)
            except Exception as e:
                self.redis.decrease(proxy)
                print("代理请求失败", proxy, e.args)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #4
0
파일: tester.py 프로젝트: ZZShi/proxy_pool
class Tester(object):
    def __init__(self):
        self.db = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        使用异步请求库aiohttp对代理进行测试
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as r:
                    if r.status == 200:
                        self.db.max(proxy)
                        print('代理可用:', proxy)
                    else:
                        self.db.exists(proxy)
                        print('状态码不合法:', proxy)
            except Exception as e:
                self.db.decrease(proxy)
                print('代理请求异常:', proxy)

    def run(self):
        try:
            count = self.db.count()
            print('当前剩余{}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                test_proxies = self.db.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()  # 刷新缓冲区
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #5
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def single_proxy(self, proxy):
        async with aiohttp.ClientSession() as session:
            try:
                real_proxy = 'https://' + proxy
                print('正在测试:', proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=10,
                                       allow_redirects=False,
                                       verify_ssl=False) as resp:
                    if 200 == resp.status:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求错误:', resp.status, 'IP', proxy)

            except (ClientError, ClientProxyConnectionError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('开始检测ip:....')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                end = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', end, '个代理')
                test_proxies = self.redis.batch(start, end)
                loop = asyncio.get_event_loop()
                tasks = [self.single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('检测异常:', e.args)


# r = Tester()
# r.run()
예제 #6
0
파일: tester.py 프로젝트: andy521/ip_pool
class Tester(object):

    def __init__(self):
        self.redis = RedisClient()
        self.headers = {
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
        }
        self.test_url = ""

    def parse_url(self, proxy):

        print("即将开始测试代理: ", proxy)

        if proxy.split(":")[0] == "http":
            proxies = {"http": proxy}
            self.test_url = "http://www.xinhuanet.com"
        else:
            proxies = {"https": proxy}
            self.test_url = "https://www.baidu.com"

        try:
            print(self.test_url)
            response = requests.get(self.test_url, proxies=proxies, headers=self.headers, timeout=3)

            if response.status_code == 200:
                self.redis.max(proxy)
                print("发现可用代理: ", proxy)
            else:
                self.redis.decrease(proxy)
                print("响应状态玛不合法: ", response.status_code, " proxy: ", proxy)
        except Exception as e:
            self.redis.decrease(proxy)
            print("请求发生错误: ", e, " proxy: ", proxy)

    def run(self):
        print("开始测试代理ip......")
        count = self.redis.count()
        print("当前剩余 ", count, " 个代理")
        for i in range(0, count, BATCH_TEST_SIZE):
            start = i
            stop = min(i + BATCH_TEST_SIZE, count)
            print("正在测试第 ", start + 1, " - ", stop, "个代理")

            test_proxies = self.redis.batch(start, stop)

            for proxy in test_proxies:
                self.parse_url(proxy)
예제 #7
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(url=TEST_URL,
                                       proxy=real_proxy,
                                       timeout=5) as response:
                    if response.status in VALID_STATUS_CODE:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)
            except (ClientError, TimeoutError, AttributeError) as e:
                print(e.args)
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前代理剩余个数', count)
            loop = asyncio.get_event_loop()
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(count, i + BATCH_TEST_SIZE)
                test_proxies = self.redis.batch(start, stop)
                print('正在测试', start + 1, '-', stop, '个代理')
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(2)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #8
0
class Tester:
    def __init__(self):
        self.redis = RedisClient()
    
    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf8")
                real_proxy = "http://"+proxy
                print(f"testing {proxy}")
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print(f"{proxy} can use.")
                    else:
                        self.redis.decrease(proxy)
                        print(f"request is invalid {response.status} by proxy ip {proxy}")
            except (ClientError, aiohttp.client_exceptions.ClientConnectionError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print(f"request is failed by proxy ip {proxy}")
    def run(self):
        print("tester is begining")
        try:
            count = self.redis.count()
            print(f"hold {count} proxies")
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i+BATCH_TEST_SIZE, count)
                print(f"will test {start+1} - {stop} proxies")
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print(f"test error, {e.args}")
예제 #9
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, headers = base_header, proxy=real_proxy, timeout=3, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用',proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求错误',response.status,proxy)
            except (asyncio.TimeoutError,aiohttp.client_exceptions.ClientProxyConnectionError,aiohttp.ClientError):
                self.redis.decrease(proxy)
                print('代理请求失败',proxy)

    def run(self):
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩下代理数量:',count)
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE ,count)
                print('正在测试第', start + 1, '-', stop, '的代理')
                test_proxies = self.redis.batch(start,stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #10
0
class Tester(object):
    def __init__(self):
        self.db = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf-8")
                real_proxy = "http://" + proxy
                async with session.get(url=TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALUE_CODE:
                        print("代理", proxy, "有效,分数置为100")
                        self.db.max(proxy)
                    else:
                        print("代理", proxy, "响应码,分数减1")
                        self.db.decrease(proxy)
            except Exception:
                print("代理", proxy, "请求出错,分数减1")
                self.db.decrease(proxy)

    def run(self):
        count = self.db.count()
        print('当前剩余', count, '个代理')
        for i in range(0, count, BATCH):
            start = i
            end = min(i + BATCH, count)
            proxies = self.db.batch(start, end)
            print('正在测试第', start + 1, '-', end, '个代理')
            loop = asyncio.get_event_loop()
            tasks = [self.test_single_proxy(proxy) for proxy in proxies]
            loop.run_until_complete(asyncio.wait(tasks))
            time.sleep(5)
예제 #11
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    #测试单个代理
    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(
                        TEST_URL,
                        proxy=real_proxy,
                        timeout=15,
                        allow_redirects=False,
                        headers=
                    {
                        'Host':
                        'xueqiu.com',
                        'Referer':
                        'https://xueqiu.com/u/8205178197',
                        'User-Agent':
                        'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
                        'X-Requested-With':
                        'XMLHttpRequest',
                        'Cookie':
                        'device_id=5d27463e2df6a534e7ecba029eb95e29; xq_a_token=f89219d7e7ee863a5773244ad9d2db6e3dc5ea38; xq_r_token=8bdf53186f54b2c5c885621e64fd4d728f3111e0;',
                    }) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    #测试启动
    def run(self):
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            #批量测试
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #12
0
파일: tester.py 프로젝트: sleepray/Python-
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    #函数前面加 async 表示此函数是异步的
    async def test_single_proxy(self, proxy):
        '''
        测试单个代理
        :param proxy: 
        :return: 
        '''
        conn = aiohttp.TCPConnector(
            verify_ssl=False)  #获取请求,verify_ssl=False防止ssl证书报错
        async with aiohttp.ClientSession(
                connector=conn
        ) as session:  #创建一个session对象(session用于存储特定对话所需信息)
            try:
                if isinstance(proxy, bytes):  #判断proxy是不是bytes类型
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(
                        TEST_URL,
                        proxy=real_proxy,
                        timeout=15,
                        allow_redirects=False
                ) as response:  #allow_redirects=False禁止重定向
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)  #调用db的max()方法将score设为100
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)  #调用db的decrease方法将score减一
                        print('请求响应码不合法', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        '''
        测试主函数
        :return: 
        '''
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):  # 步长为100
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(
                    start, stop)  # 调用db的batch()获取100个代理列表从高到低排列
                loop = asyncio.get_event_loop()  # 获取EventLoop
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(
                    tasks))  #执行异步任务tasks。在等待网站返回的时候去执行另一个任务,网站返回后跳回任务继续执行原任务
                sys.stdout.flush()  #输出实时信息,而不是等待运行完毕后输出
                time.sleep(5)
        except Exception as e:
            print('测试器发送错误', e.args)
예제 #13
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
        self.headers = {
            'User-agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
        }

    # async def test_single_proxy(self, proxy):
    def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        # conn = aiohttp.TCPConnector(verify_ssl=False)
        # async with aiohttp.ClientSession(connector=conn) as session:
        #     try:
        #         if isinstance(proxy, bytes):
        #             proxy = proxy.decode('utf-8')
        #         real_proxy = 'http://' + proxy
        #         print('正在测试', proxy)
        #         async with session.get(TEST_URL, proxy=real_proxy, timeout=5, allow_redirects=False) as response:
        #             if response.status in VALID_STATUS_CODES:
        #                 self.redis.max(proxy)
        #                 print('代理可用', proxy)
        #             else:
        #                 self.redis.decrease(proxy)
        #                 print('请求响应码不合法 ', response.status, 'IP', proxy)
        #     except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
        #         self.redis.decrease(proxy)
        #         print('代理请求失败', proxy)
        # if isinstance(proxy, bytes):
        # proxy = proxy.decode('utf-8')
        ip = proxy.split(':')[0]
        port = proxy.split(':')[1]
        print('正在测试', proxy)
        try:
            conn = http.client.HTTPConnection(ip, port, timeout=5.0)
            conn.request(method='GET', url=TETS_URL, headers=self.headers)
            res = conn.getresponse()
            print("+++Success:" + proxy)
            self.redis.max(proxy)
        except:
            print("---Failure:" + proxy)
            self.redis.decrease(proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                # loop = asyncio.get_event_loop()
                # tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                for proxy in test_proxies:
                    self.test_single_proxy(proxy)
                    time.sleep(0.5)
                # loop.run_until_complete(asyncio.wait(tasks))
                # sys.stdout.flush()
                # time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #14
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        """
        # 如果proxy是字节类型的,以utf-8格式解码
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf-8')
        real_proxy = 'http://' + proxy
        # 不验证SSL
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 访问httpbin
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=7,
                                       allow_redirects=False) as req:
                    # # 获取相应内容
                    # response_content = await req.json()
                    # ip_response = response_content['origin']
                    # # 获取访问IP
                    # juege_proxy = re.search('(.*):', proxy).group(1)
                    # # 判断访问IP是否与代理一致
                    # if ip_response == juege_proxy:
                    #     # 代理分值设置为最高
                    #     self.redis.max(proxy)
                    #     # print('代理可用', proxy)
                    if req.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)

            except Exception as e:
                # print(e.args)
                # 有异常则代理分数减一
                self.redis.decrease(proxy)
                # print('代理不可用,分值-1', proxy)

    def run(self):
        """
        批量测试代理
        """
        try:
            # 获取当前代理池代理数量
            count = self.redis.count()
            print('当前共有', count, '个代理!')
            # 批量测试代理
            for i in range(0, count, BATCH_SIZE):
                start = i
                stop = min(i + BATCH_SIZE, count - 1)
                print('正在测试第', start + 1, '-', stop, '个代理!')
                proxies_list = self.redis.batch(start, stop)
                # 启用一个事件循环
                loop = asyncio.get_event_loop()
                # 把携程对象封装为task
                task = [
                    self.test_single_proxy(proxy) for proxy in proxies_list
                ]
                # 运行
                loop.run_until_complete(asyncio.wait(task))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #15
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    #这是一个异步的方法
    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """

        conn = aiohttp.TCPConnector(ssl=False)
        #建立一个session对象
        #session可以进行多项操作,比如post, get, put, head等
        async with aiohttp.ClientSession(connector=conn) as session:

            #检查如果是字节类型就解码
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)

                #利用session对象去get
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    #如果状态码有效
                    if response.status in VALID_STATUS_CODES:
                        #状态码值设置为最大
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')

            #取出数量为BATCH_TEST_SIZE的proxy
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                #这样可以取到最后一个proxy
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)

                #调用这个方法可以避免“进程已经运行”这个错误
                nest_asyncio.apply()

                #主线程调用asyncio.get_event_loop()时会创建事件循环
                loop = asyncio.get_event_loop()  #

                #tasks为异步的任务,列表里面生成的为coroutine(协程)元素
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                #把异步的任务丢给这个循环的run_until_complete()方法
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
예제 #16
0
class Verify:
    def __init__(self):
        self.db = RedisClient()

    async def verify_proxy(self, redis_key, proxy):
        '''
        验证一个代理IP
        :param proxy:
        :return:
        '''
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf-8')
        re_proxy = 'http://' + proxy

        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                async with session.get(setting.TEST_URL,
                                       proxy=re_proxy,
                                       timeout=6,
                                       allow_redirects=False) as resp:
                    if resp.status in [200, 302]:
                        print("{}||{}池:{}: ok 100点".format(
                            time.ctime(), redis_key, proxy))
                        self.db.max(redis_key, proxy)
                    else:
                        print("{}||{}池:{}: fail -1点".format(
                            time.ctime(), redis_key, proxy))
                        self.db.decrease(redis_key, proxy)
            except (aiohttp.ClientError, aiohttp.ClientConnectorError,
                    asyncio.TimeoutError) as e:
                print("{}||{}池:{}: error -1点".format(time.ctime(), redis_key,
                                                     proxy))
                self.db.decrease(redis_key, proxy)

    # async def run_by_redis(self, redis_key):
    #     count = self.db.count(redis_key)
    #     print(redis_key, '当前剩余', count, '个代理')
    #     for i in range(0, count, setting.TEST_SIZE):
    #         start = i
    #         end = min(i + setting.TEST_SIZE, count) - 1
    #         print('正在测试{}第'.format(redis_key), start + 1, '-', end + 1, '个代理')
    #         proxies = self.db.batch(redis_key, start, end)
    #         for proxy in proxies:
    #             await self.verify_proxy(redis_key, proxy)
    #
    # def run(self):
    #     print("开始验证代理")
    #     try:
    #         tasks = [
    #             self.run_by_redis(setting.REDIS_KEY_HTTP),
    #             self.run_by_redis(setting.REDIS_KEY_HTTPS)
    #         ]
    #         loop = asyncio.get_event_loop()
    #         loop.run_until_complete(asyncio.wait(tasks))
    #         time.sleep(5)
    #     except Exception as e:
    #         print('验证程序运行错误: ', e)

    def run_verify_http(self, part):
        stime = time.time()

        count = self.db.count(setting.REDIS_KEY_HTTP)
        start = part * (count // 4)
        stop = start + (count // 4)
        if part == 3:
            stop = count
        try:
            logger.info("{}开始验证{}-{}".format(setting.REDIS_KEY_HTTP, start,
                                             stop))

            for i in range(start, stop, setting.HTTP_VERIFY_SIZE):
                proxies = self.db.batch(setting.REDIS_KEY_HTTP, i,
                                        i + setting.HTTP_VERIFY_SIZE)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.verify_proxy(setting.REDIS_KEY_HTTP, proxy)
                    for proxy in proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))

            logger.info("{}验证完成{}-{}耗时:{}".format(setting.REDIS_KEY_HTTP,
                                                  start, stop,
                                                  time.time() - stime))
        except Exception as e:
            logger.info('{}验证报错{}-{}:{}'.format(setting.REDIS_KEY_HTTP, start,
                                                stop, e))

    def run_verify_https(self):
        stime = time.time()
        try:
            logger.info("{}开始验证".format(setting.REDIS_KEY_HTTPS))

            count = self.db.count(setting.REDIS_KEY_HTTPS)
            for i in range(0, count, setting.HTTP_VERIFY_SIZE):
                proxies = self.db.batch(setting.REDIS_KEY_HTTPS, i,
                                        i + setting.HTTP_VERIFY_SIZE)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.verify_proxy(setting.REDIS_KEY_HTTPS, proxy)
                    for proxy in proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))

            logger.info("{}验证完成,耗时:{}".format(setting.REDIS_KEY_HTTPS,
                                              time.time() - stime))
        except Exception as e:
            logger.warning('{}验证报错:{}'.format(setting.REDIS_KEY_HTTPS, e))