예제 #1
0
class Detection(object):
    def __init__(self):
        self.redis = RedisDB()

    async def detection_proxy(self, proxy, semaphore):
        async with semaphore:
            con = aiohttp.TCPConnector(verify_ssl=False,
                                       family=socket.AF_INET,
                                       limit=60)
            async with aiohttp.ClientSession(connector=con) as session:
                try:
                    test_proxy = "http://" + proxy
                    log.debug("正在测试代理:" + test_proxy)
                    async with session.get(TEST_URL,
                                           proxy=test_proxy,
                                           timeout=7) as response:
                        html = await response.text()
                        if response.status == 200 and '检测到有异常请求' not in html:
                            log.debug("\n" + proxy + " 代理可用")
                        else:
                            self.redis.delete_value(redis_key, proxy)
                            log.debug("已清除失效的代理:" + proxy)
                except Exception as e:
                    self.redis.delete_value(redis_key, proxy)
                    log.debug("\n" + proxy + ' 代理请求失败')
                    log.debug("已清除失效的代理:" + proxy)

    def run(self):
        try:
            proxies = self.redis.get_all(redis_key)
            for i in range(0, len(proxies), BATCH_SIZE):
                test_proxies = proxies[i:i + BATCH_SIZE]
                self.main(test_proxies)
        except Exception as e:
            log.debug("测试发生错误", e.args)

    def main(self, test_proxies):
        semaphore = asyncio.Semaphore(5)
        loop = asyncio.get_event_loop()
        task = [
            self.detection_proxy(proxy, semaphore) for proxy in test_proxies
        ]
        loop.run_until_complete(asyncio.wait(task))
예제 #2
0
class Detection(object):
    def __init__(self):
        self.redis = RedisDB()
        self.test_url = 'https://m.weibo.cn/'

    @tools.debug
    async def get_html(self, root_url, proxy, semaphore):
        try:
            test_proxy = "http://" + proxy
            log.debug("正在测试代理:" + test_proxy)
            async with semaphore:
                response = await requests.get(root_url,
                                              proxy=test_proxy,
                                              timeout=5)
                html = await response.text()
                return response, html
        except asyncio.TimeoutError as err:
            #log.debug(err)
            return [], []

    @tools.debug
    async def run(self, content_info):

        semaphore = asyncio.Semaphore(10)
        try:
            response, html = await self.get_html(self.test_url, content_info,
                                                 semaphore)
            if html and response:
                if response.status == 200 and '检测到有异常请求' not in html:
                    log.debug("\n" + content_info + " 代理可用")
                else:
                    self.redis.delete_value(redis_key, content_info)
                    log.debug("已清除失效的代理:" + content_info)
            else:
                self.redis.delete_value(redis_key, content_info)
                log.debug("已清除失效的代理:" + content_info)
        except Exception as e:
            print(e)
            self.redis.delete_value(redis_key, content_info)
            log.debug("\n" + content_info + ' 代理请求失败')
            log.debug("已清除失效的代理:" + content_info)

    def doing_main(self):
        task_list = self.redis.get_all(redis_key)
        log.debug('数据库中IP总数{}'.format(len(task_list)))
        tasks = [asyncio.ensure_future(self.run(data)) for data in task_list]
        loop = asyncio.get_event_loop()
        loop.run_until_complete(asyncio.wait(tasks, timeout=6))