Esempio n. 1
0
class Test_ip(object):
    def __init__(self):
        self.db = RedisClient()
        self.headers = headers
        self.url = test_url
    def get_url(self,proxy):
        try:
            con =  requests.get(self.url,headers = self.headers,proxies = proxy)
            if con.status_code==200:
                return True
            else:
                return False
        except:
            return False
    def test(self,ip):
        ip = ip.decode('utf-8')
        proxy = {'http':'http://'+ip}
        test_result = self.get_url(proxy)
        if test_result:
            self.db.max(ip,)
        else:
            self.db.decrease(ip)
    def run(self):
        proxies = self.db.all()
        for i in range(len(proxies)):
            ip = proxies[i]
            t = threading.Thread(target=self.test,args=(ip,))
            t.setDaemon(True)
            t.start()
            random_time()
            if i%100==0:
                time.sleep(5)
Esempio n. 2
0
class PoolTester(object):
    def __init__(self):
        self.redis = RedisClient()

    def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """

        if test_proxy_vaild(proxy):
            self.redis.max(proxy)
            print("[+] 代理可用", proxy)
        else:
            self.redis.drop(proxy)
            print("[-] 代理不可用", proxy)

    def run(self):
        """
        测试的主函数
        :return:
        """
        print("测试器开始运行.......")
        try:
            count = self.redis.count()
            print("当前剩余%d个代理" % (count))
            # 使用线程池, 快速检测proxy是否可用
            with ThreadPoolExecutor(FilterTreadCount) as pool:
                pool.map(self.test_single_proxy, self.redis.all())
        except Exception as e:
            print("测试器发生错误", e)
class PoolTester(object):
    def __init__(self):
        self.redis = RedisClient()

    def testSingleProxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        if testProxyVaild(proxy):
            self.redis.max(proxy)
            print(Fore.GREEN + "[+] 代理可用", proxy)
        else:
            self.redis.drop(proxy)
            print(Fore.RED + "[-] 代理不可用", proxy)

    def run(self):
        """
        测试的主函数
        :return:
        """
        print(Fore.GREEN + "测试器开始运行.......")
        try:
            count = self.redis.count()
            print(Fore.GREEN + "当前剩余%d个代理" % count)
            # 使用线程池, 快速检测proxy是否可用
            with ThreadPoolExecutor(FILTER_THREAD_COUNT) as pool:
                pool.map(self.testSingleProxy, self.redis.all())
        except Exception as e:
            print(Fore.RED + "测试器发生错误", e)
Esempio n. 4
0
class Tester():
    """
    验证代理
    """
    def __init__(self):
        self.redis = RedisClient()

    def test_single_proxy(self,proxy):
        """
        测试单个代理
        """
        try:
            response = requests.get(url=TEST_URL, timeout=5)
            if response.status_code in VALID_STATUS_CODES:
                self.redis.max(proxy) #测试成功 将代理分数设置到最大
                print('测试成功', proxy, time.strftime('%Y-%m-%d %H:%M', time.localtime(time.time())))
            else:
                #print('代理测试失败',proxy, response.status_code)
                self.redis.decrease(proxy)
        except RequestException:
            print('代理测试请求异常', proxy)
            self.redis.decrease(proxy)

    def run(self):
        print('测试器开始 测试代理%d个' % self.redis.count(), time.strftime('%Y-%m-%d %H-%M',time.localtime(time.time())))
        # 从数据库获取全部
        proxies = self.redis.all()
        for proxy in proxies:
            self.test_single_proxy(proxy)
Esempio n. 5
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self,proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试',proxy)
                async with session.get(TEST_URL,proxy=real_proxy,timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用",proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求响应码不合法",proxy)
            except (Exception):
                self.redis.decrease(proxy)
                print("代理请求失败",proxy)

    def run(self):
        print("测试器开始运行")
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0,len(proxies),BATCH_TEST_SIZE):
                test_proxies = proxies[i:i+BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print("测试器发生错误",e.args)
Esempio n. 6
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                print('正在测试', proxy)
                for url in TEST_URL:
                    real_proxy = 'http://' + proxy
                    async with session.get(url,
                                           proxy=real_proxy,
                                           timeout=15,
                                           allow_redirects=False) as response:
                        if response.status in VALID_STATUS_CODES:
                            self.redis.max(proxy,
                                           http=re.match(r'http[s]?',
                                                         url).group())
                            print('代理可用', proxy)
                        else:
                            self.redis.decrease(proxy)
                            print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 7
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        '''
        测试单个代理
        :param proxy: 单个代理
        :return: None
        '''
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                # 判断一个对象是否是一直类型
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试...')
                async with session.get(test_url, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in valid_status_codes:
                        self.redis.max(proxy)  # 代理可用,代理设置为最大值
                        print('代理可用')
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法,代理检测失败')
            except Exception as e:
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        '''
        测试主函数
        :return: None
        '''
        print('测试器开始运行>>>>>>')
        try:
            proxie = self.redis.all()  # 获取全部代理
            loop = asyncio.get_event_loop()
            # asyncio实现并发,就需要多个协程组成列表来完成任务【创建多个协程的列表,然后将这些协程注册到事件循环中】,
            # 每当有任务阻塞的时候就await,然后其他协程继续工作,所以下面是协程列表;
            # 所谓的并发:多个任务需要同时进行;

            # 批量测试
            for i in range(0, len(proxie), batch_test_size):
                test_proxies = proxie[i:i + batch_test_size]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
            print('检测完成')

        except Exception as e:
            print('测试发生错误!!', e)
Esempio n. 8
0
class Tester(object):
    """检测代理是否能行"""
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试当个代理
        :param proxy:代理
        :return: None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)  # 防止ssl报错
        async with aiohttp.ClientSession(
                connector=conn) as session:  # 创建session
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = "http://" + proxy
                print("正在尝试:", real_proxy)
                async with session.get(test_url, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求响应不合法", proxy)
            except Exception as e:
                self.redis.decrease(proxy)
                print("代理请求失败", proxy, e.args)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 9
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self,proxy):
        '''
        测试单个代理
        :param proxy:
        :return: None
        '''
        #连接
        # 创建aiohttp的ClientSession对象,类似requests的session对象
        async with aiohttp.ClientSession() as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试',proxy)
                async with session.get(TEST_URL,proxy=real_proxy,verify_ssl=False,timeout=60,allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用',proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法',proxy)
            except (ClientError, aiohttp.client_exceptions.ClientProxyConnectionError, asyncio.TimeoutError, AttributeError) as e:
                print(e.args)
                self.redis.decrease(proxy)
                print('代理请求失败',proxy)

    def run(self):
        '''
        测试主函数
        :param self:
        :return: None
        '''
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                '''对指定数量内的代理进行测试'''
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                # 输出刷新 每次返回一点
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试台发生错误', e.args)
Esempio n. 10
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:       单个代理
        :return:            None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:    None
        """
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            count = self.redis.count()
            print(count)
            # 批量测试
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 11
0
class Tester:
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy: 单个代理
        :return: None
        """
        if isinstance(proxy, bytes):
            proxy = proxy.decode('utf-8')
        real_proxy = 'http://' + proxy
        conn = aiohttp.TCPConnector(verify_ssl=False)
        try:
            async with aiohttp.ClientSession(headers=settings.headers,
                                             connector=conn) as session:
                print('正在测试', proxy)
                rsp = await session.get(settings.target_url,
                                        proxy=real_proxy,
                                        timeout=5)
                if rsp.status == 200:
                    self.redis.max(proxy)
                    print('代理可用', proxy)
                else:
                    self.redis.decrease(proxy)
                    print('代理请求失败', proxy)
                    raise HttpProcessingError(code=rsp.status,
                                              message=rsp.reason)
        except Exception as e:
            self.redis.decrease(proxy)
            print('代理请求失败', proxy)
            print(e.__cause__)

    def run(self):
        """
        测试主函数
        :return: None
        """
        print('开始测试...')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(proxies), settings.test_request_count):
                test_proxies = proxies[i:i + settings.test_request_count]
                task = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(task))
                time.sleep(5)
        except Exception as e:
            print('测试出现异常', e.args)
Esempio n. 12
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=10) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应不合法', proxy)
            except (ClientConnectionError, ClientError, ConnectTimeout):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0,len(proxies),BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)

    def test_single_tread(self, proxy):
        real_proxy = {'https': 'https://' + proxy}
        print('测试', real_proxy)
        try:
            res = requests.get(TEST_URL, proxies=real_proxy, timeout=10)
            if res.status_code in VALID_STATUS_CODES:
                self.redis.max(proxy)
            else:
                self.redis.decrease(proxy)
        except (ConnectionError, ConnectTimeout):
            self.redis.decrease(proxy)
            print('代理请求失败', proxy)

    def new_run(self):
        for ip in self.redis.all():
            self.test_single_tread(ip)
Esempio n. 13
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()
    
    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print( 'IP', proxy,'请求响应码不合法 ')
            except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)
    
    def run(self):
        print('测试器运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0,len(proxies),BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
            '''
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                print(tasks)
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
            '''
        except Exception as e:
            print(e.args)
Esempio n. 14
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    """
    测试类,会根据提供的测试IP地址来判断代理是否可用
    """

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        @param proxy: 单个代理
        return None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                real_proxy = 'http://%s' % proxy
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=2) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
            except (ClientError, ClientConnectorError, TimeoutError,
                    AttributeError):
                self.redis.decrease(proxy)

    def run(self):
        """
        测试主函数
        """
        print('Starting test')
        try:
            # 获取所有的代理
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            # 批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]

                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]

                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('Tester errors', e.args)
Esempio n. 15
0
class Tester(object):
    def __init__(self):
        self.db = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        使用异步请求库aiohttp对代理进行测试
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as r:
                    if r.status == 200:
                        self.db.max(proxy)
                        print('代理可用:', proxy)
                    else:
                        self.db.exists(proxy)
                        print('状态码不合法:', proxy)
            except Exception as e:
                self.db.decrease(proxy)
                print('代理请求异常:', proxy)

    def run(self):
        try:
            count = self.db.count()
            print('当前剩余{}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                test_proxies = self.db.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()  # 刷新缓冲区
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 16
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def single_proxy(self, proxy):
        async with aiohttp.ClientSession() as session:
            try:
                real_proxy = 'https://' + proxy
                print('正在测试:', proxy)
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=10,
                                       allow_redirects=False,
                                       verify_ssl=False) as resp:
                    if 200 == resp.status:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求错误:', resp.status, 'IP', proxy)

            except (ClientError, ClientProxyConnectionError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('开始检测ip:....')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                end = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', end, '个代理')
                test_proxies = self.redis.batch(start, end)
                loop = asyncio.get_event_loop()
                tasks = [self.single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('检测异常:', e.args)


# r = Tester()
# r.run()
Esempio n. 17
0
class Tester(object):

    def __init__(self):
        self.redis = RedisClient()
        self.headers = {
            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
        }
        self.test_url = ""

    def parse_url(self, proxy):

        print("即将开始测试代理: ", proxy)

        if proxy.split(":")[0] == "http":
            proxies = {"http": proxy}
            self.test_url = "http://www.xinhuanet.com"
        else:
            proxies = {"https": proxy}
            self.test_url = "https://www.baidu.com"

        try:
            print(self.test_url)
            response = requests.get(self.test_url, proxies=proxies, headers=self.headers, timeout=3)

            if response.status_code == 200:
                self.redis.max(proxy)
                print("发现可用代理: ", proxy)
            else:
                self.redis.decrease(proxy)
                print("响应状态玛不合法: ", response.status_code, " proxy: ", proxy)
        except Exception as e:
            self.redis.decrease(proxy)
            print("请求发生错误: ", e, " proxy: ", proxy)

    def run(self):
        print("开始测试代理ip......")
        count = self.redis.count()
        print("当前剩余 ", count, " 个代理")
        for i in range(0, count, BATCH_TEST_SIZE):
            start = i
            stop = min(i + BATCH_TEST_SIZE, count)
            print("正在测试第 ", start + 1, " - ", stop, "个代理")

            test_proxies = self.redis.batch(start, stop)

            for proxy in test_proxies:
                self.parse_url(proxy)
class Tester(object):   # 定义一个类Tester
    def __init__(self):
        self.redis = RedisClient()  # 建立一个RedisClient对象,供该对象中其他方法使用

    async def test_single_proxy(self):  # 定义test_single_proxy方法,检测单个代理的可用情况,参数就是被检测的代理,async异步
        """
        测试单个代理
        :param proxy: 单个代理
        :return:  None
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:    # 创建aiohttp的ClientSession对象,
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response: # 通过proxy传递get方法
                    # 测试的链接定义为常量TEST_URL,对某个网站有抓取需求,将TEST_URL设置为目标网站地址
                    if response.status in VALID_STATUS_CODES:   # 定义VALID_STATUS_CODES变量,是列表形式,包含正常状态码
                        self.redis.max(proxy)   # 调用RedisClient的max方法将代理分数设置为100
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)  # 否则调用decrease方法将代理分数减1
                        print('请求相应码不合法', proxy)
            except (ClientError, ClientConnectorError, TimeoutError, AttributeError):
                self.redis.decrease(proxy)  # 出现异常也同样将代理分数减1
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return: None
        """
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            # 批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 19
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(ssl=False)

        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf-8")

                real_proxy = "http://" + proxy
                print("正在测试", proxy)

                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求响应码不合法", proxy)
            except (aiohttp.ClientError, aiohttp.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print("代理请求失败", proxy)

    def run(self):
        print("测试器开始执行")
        try:
            proxies = self.redis.all()
            count = self.redis.count()
            loop = asyncio.get_event_loop()

            for i in range(0, count, BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print("测试器发生错误", e.args)
Esempio n. 20
0
class My_Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        async with aiohttp.ClientSession() as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                async with session.get(url=TEST_URL,
                                       proxy=real_proxy,
                                       timeout=5) as response:
                    if response.status == 200:
                        print('代理有效', proxy)
                        print('当前代理数', self.redis.count())
                        self.redis.max(proxy)
                    else:
                        print('代理无效', proxy, '状态码', response.status)
                        #self.redis.decrease(proxy)
            except Exception as e:
                print('Error', e.args)
                #self.redis.decrease(proxy)

    def run(self):
        print('测试器开始运行')
        proxies = []
        crawler = Crawler()
        for proxy in crawler.crawl_ip3366():
            proxies.append(proxy)
        for proxy in crawler.crawl_xicidaili():
            proxies.append(proxy)
        loop = asyncio.get_event_loop()
        try:
            for i in range(0, len(proxies), 10):
                tasks = [
                    self.test_single_proxy(proxy)
                    for proxy in proxies[i:i + 10]
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(2)
            loop.close()
        except Exception as e:
            print('测试错误', e.args)
Esempio n. 21
0
class Tester:
    """检测代理池中代理是否可用,可用则分数至为100,否则分数减一"""
    def __init__(self):
        self.redis = RedisClient()

    async def single_test(self, proxy):
        """单个代理测试"""
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = "http://" + proxy
                print("正在测试:", proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status_code in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print(proxy, '代理可用')
                    else:
                        self.redis.decrease(proxy)
                        print(proxy, 'IP 请求响应码不合法')
            except (ClientError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """异步测试"""
        print('开始测试代理')
        try:
            count = self.redis.count()
            print('当前剩余', count, '个代理')

            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                test_proxies = proxies[start:stop]
                tasks = [self.single_test(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(randint(1, 5))
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 22
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(url=TEST_URL,
                                       proxy=real_proxy,
                                       timeout=5) as response:
                    if response.status in VALID_STATUS_CODE:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)
            except (ClientError, TimeoutError, AttributeError) as e:
                print(e.args)
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前代理剩余个数', count)
            loop = asyncio.get_event_loop()
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(count, i + BATCH_TEST_SIZE)
                test_proxies = self.redis.batch(start, stop)
                print('正在测试', start + 1, '-', stop, '个代理')
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(2)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 23
0
class Tester:
    def __init__(self):
        """初始化数据库管理对象"""
        self.redis = RedisClient()

    async def test_one_proxy(self, proxy):
        """对目标网站测试一个代理是否可用"""
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    # 解码为字符串
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=30) as response:
                    if response.status in TRUE_STATUS_CODE:
                        # 代理可用
                        self.redis.max(proxy)
                        print(proxy, 100, '可用')
                    else:
                        # 代理不可用
                        self.redis.decrease(proxy)
                        print(proxy, -1, "状态码错误")
            except Exception as e:
                self.redis.decrease(proxy)
                print(proxy, -1, e.args)

    async def start(self):
        """启动协程, 测试所有代理"""
        try:
            proxies = self.redis.all()
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_one_proxy(proxy) for proxy in test_proxies]
                await asyncio.gather(*tasks)
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)

    def run(self):
        asyncio.run(self.start())
Esempio n. 24
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                    else:
                        self.redis.decrease(proxy)
            except (aiohttp.ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)

    def run(self):
        print('测试器开始运行!')
        try:
            proxies = self.redis.all()
            count = len(proxies)
            print('当前共有{0}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                end = min(i + BATCH_TEST_SIZE, count)
                test_proxies = proxies[start:end]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop = asyncio.get_event_loop()
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 25
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """Test single proxy"""
        try:
            conn = TCPConnector(verify_ssl=False)
            async with ClientSession(connector=conn) as session:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试: ', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=10) \
                        as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用: ', proxy)
                    else:
                        self.redis.derease(proxy)
                        print('请求响应不合法: ', proxy)
        except(ClientError, ClientConnectorError, TimeoutError, \
                AttributeError):
            self.redis.decrease(proxy)
            print('代理请求失败: ', proxy)

    def run(self):
        """Test function"""
        print('开始测试...')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for index in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[index:index + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试发生错误', e.args)
Esempio n. 26
0
class Tester_2:
    def __init__(self):
        self.redis = RedisClient()
        self.headers = {
            'user-agent':
            'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
            'Chrome/73.0.3683.103 Safari/537.36'
        }

    def single_test(self, proxy):
        proxies = {'http': 'http://' + proxy, 'https': 'https://' + proxy}
        try:
            resp = requests.get(TEST_URL,
                                proxies=proxies,
                                headers=self.headers)
            if resp.status_code == 200:
                print(proxy, '代理可用')
                self.redis.max(proxy)
            else:
                self.redis.decrease(proxy)
                print(proxy, 'IP 请求响应码不合法')
        except ConnectionError:
            print('代理请求失败', proxy)
            self.redis.decrease(proxy)

    def run(self):
        count = self.redis.count()
        print('共有', count, '个代理')
        print('开始检测代理')
        proxies = self.redis.all()
        i = 0
        try:
            for proxy in proxies:
                print(proxy)
                i += 1
                self.single_test(proxy)
                time.sleep(randint(1, 5))
                if i == 15:
                    break
        except Exception as e:
            print('测试器发生错误', e)
Esempio n. 27
0
class Tester:
    def __init__(self):
        self.redis = RedisClient()
    
    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode("utf8")
                real_proxy = "http://"+proxy
                print(f"testing {proxy}")
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print(f"{proxy} can use.")
                    else:
                        self.redis.decrease(proxy)
                        print(f"request is invalid {response.status} by proxy ip {proxy}")
            except (ClientError, aiohttp.client_exceptions.ClientConnectionError, asyncio.TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print(f"request is failed by proxy ip {proxy}")
    def run(self):
        print("tester is begining")
        try:
            count = self.redis.count()
            print(f"hold {count} proxies")
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i+BATCH_TEST_SIZE, count)
                print(f"will test {start+1} - {stop} proxies")
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print(f"test error, {e.args}")
Esempio n. 28
0
class Tester():
    def __init__(self):
        self.redis = RedisClient()

    async def single_test(self, proxy):
        # try connecting with single proxy
        conn = aiohttp.TCPConnector(ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                real_proxy = "https://" + proxy.string()
                print("testing", proxy)
                async with session.get(TEST_URL,
                                       allow_redirects=False,
                                       proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODE:
                        self.redis.max(proxy)
                        print("Available proxy:", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("Not Available Status:", proxy, " Score -1")
            except (aiohttp.ClientError, aiohttp.ClientConnectorError,
                    TimeoutError, AttributeError, aiohttp.ClientOSError,
                    aiohttp.ClientHttpProxyError):
                self.redis.decrease(proxy)
                print("Error detected!", proxy)

    def run(self):
        print("Starts running tester")
        try:
            entries = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(0, len(entries), 200):
                test_proxies = entries[i:i + BATCH_TEST_SIZE]
                tasks = [self.single_test(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print(' Error with tester ', e.args)
Esempio n. 29
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy,bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, headers = base_header, proxy=real_proxy, timeout=3, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用',proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求错误',response.status,proxy)
            except (asyncio.TimeoutError,aiohttp.client_exceptions.ClientProxyConnectionError,aiohttp.ClientError):
                self.redis.decrease(proxy)
                print('代理请求失败',proxy)

    def run(self):
        print('测试器开始运行')
        try:
            count = self.redis.count()
            print('当前剩下代理数量:',count)
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE ,count)
                print('正在测试第', start + 1, '-', stop, '的代理')
                test_proxies = self.redis.batch(start,stop)
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Esempio n. 30
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """测试单个代理"""
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)

            except (ClientProxyConnectionError, ServerDisconnectedError, ClientOSError, ClientHttpProxyError,
                    TimeoutError, AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """测试主函数"""
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            for i in range(1, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)