Example #1
0
class Getter():
    def __init__(self):
        self.Mysql = MysqlClient()
        self.crawler = Crawler()
    
    def is_over_threshold(self):
        """
        Determine whether the agent pool limit has been reached
        """
        if self.Mysql.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False
    
    def run(self):
        print('Get the execution')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                # Get an agent
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    if (self.Mysql.exists(proxy)):
                        pass
                    else:
                        print(proxy)
                        self.Mysql.add(proxy)
Example #2
0
class Tester(object):
    def __init__(self):
        self.Mysql = MysqlClient()
    
    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                #print(proxy)
                #print(proxy[0],proxy[1],proxy[2])
                
                real_proxy ="http://{0}:{1}".format(proxy[0],proxy[1])
                print(real_proxy)
                print('正在测试', proxy)
                async with session.get(TEST_URL, proxy=real_proxy, timeout=15, allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.Mysql.max_(proxy)
                        print('代理可用', proxy)
                    else:
                        self.Mysql.decrease(proxy)
                        print('请求响应码不合法 ', response.status, 'IP', proxy)
            except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
                self.Mysql.decrease(proxy)
                print('代理请求失败', proxy)
    
    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            count = self.Mysql.count()
            print('当前剩余', count, '个代理')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('正在测试第', start + 1, '-', stop, '个代理')
                test_proxies =list(self.Mysql.batch(start,stop))
                #print(test_proxies,type(test_proxies))
                loop = asyncio.get_event_loop()
                tasks = [self.test_single_proxy(proxy) for proxy in test_proxies]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Example #3
0
class Getter():
    def __init__(self):
        self.mysql = MysqlClient()
        self.spider = Spider()

    def is_over_max(self):
        if self.mysql.count() >= MAX_POOL_COUNT:
            return True
        else:
            return False

    def run(self):
        print('爬虫程序开始执行')
        if not self.is_over_max():
            for callback_lable in range(self.spider.__SpiderFuncCount__):
                callback = self.spider.__SpiderFunc__[callback_lable]
                proxies = self.spider.get_proxies(callback)
                for proxy in proxies:
                    self.mysql.add(proxy)
        self.mysql.close()
Example #4
0
class Tester(object):
    def __init__(self):
        self.Mysql = MysqlClient()

    async def test_single_proxy(self, proxy):
        """
        :Test a single agent
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                #print(proxy)
                #logger.log('DEBUG', f' Proxy : {proxy} ')
                # print("============ Test Single Proxy ==",proxy[0],proxy[1],proxy[2])
                # logger.log('DEBUG', f' Test Single Proxy : {proxy[0]} - {proxy[1]} - {proxy[2]}  ====')
                # real proxy = ip : port
                real_proxy = "http://{0}:{1}".format(proxy[0], proxy[1])
                print(real_proxy)
                # print('Testing', proxy)
                # logger.log('DEBUG', f' Testing proxy : {real_proxy} ')
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       timeout=15,
                                       allow_redirects=False) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.Mysql.max_(proxy)
                        print('Proxy is available', proxy)
                    else:
                        self.Mysql.decrease(proxy)
                        print('Request response code is invalid',
                              response.status, 'IP', proxy)
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.Mysql.decrease(proxy)
                print('Proxy request failed', proxy)

    def run(self):
        """
        Test the main function
        :return:
        """
        print('Tester starts running')
        try:
            count = self.Mysql.count()
            print('Currently remaining', count, 'Agent')
            logger.log('DEBUG', f' Currently remaining : {count}  Agent')
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('Testing the first', start + 1, '-', stop, 'agent')
                logger.log('DEBUG',
                           f' Testing the first : {start+1} - {stop} Agent ')
                # get all proxies from start to stop from the DB
                test_proxies = list(self.Mysql.batch(start, stop))
                #print(test_proxies,type(test_proxies))
                loop = asyncio.get_event_loop()
                # test each proxy in test_proxies
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(5)
        except Exception as e:
            print('An error occurred in the tester', e.args)