Example #1
0
    def extend_proxy_pool():
        """Check proxies count if need to extend proxy pool."""

        loop = asyncio.get_event_loop()
        proxies = asyncio.Queue()
        crawler = ProxyCrawler(proxies)
        validator = ProxyValidator()
        while 1:
            if conn.count > LOWER_LIMIT:
                time.sleep(CHECK_CYCLE_TIME)
                continue

            logger.debug('extend proxy pool started')

            flag = asyncio.Event()
            try:
                loop.run_until_complete(asyncio.gather(
                    ProxyPool.crawler_start(crawler, validator, proxies, flag),
                    ProxyPool.crawler_stop(crawler, flag)
                ))
            except Exception:
                logger.error(traceback.format_exc())

            logger.debug('extend proxy pool finished')
            time.sleep(CHECK_INTERVAL_TIME)
            crawler.reset() # create new flag
Example #2
0
    def extend_proxy_pool():
        """Check proxies count if need to extend proxy pool.
        """
        conn = rc()
        loop = asyncio.get_event_loop()
        flag = asyncio.Event()
        proxies = asyncio.Queue()
        crawler = ProxyCrawler(proxies)
        validator = ProxyValidator(conn)
        while 1:
            if conn.count > lower_limit:
                time.sleep(check_cycle_time)
                continue

            logger.debug('extend proxy pool started')

            try:
                loop.run_until_complete(
                    asyncio.gather(
                        ProxyPool.crawler_start(crawler, validator, proxies,
                                                flag),
                        ProxyPool.crawler_stop(crawler, conn, flag)))
            except Exception as e:
                logger.error(e, exc_info=True)

            logger.debug('extend proxy pool finished')
            time.sleep(check_interval_time)
            flag.clear()
            crawler.reset()  # clear flags
Example #3
0
def proxy_crawler_run(proxies, rules=None):
    pc = ProxyCrawler(proxies, rules)
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(pc.start())
    except:
        logger.error(traceback.format_exc())
    finally:
        loop.close()
Example #4
0
def proxy_validator_run():
    conn = rc()
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    validator = ProxyValidator(conn)
    while 1:
        try:
            loop.run_until_complete(validator.start())
        except Exception as e:
            logger.error(e, exc_info=True)
        time.sleep(validate_cycle_time)
Example #5
0
def proxy_validator_run():
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    validator = ProxyValidator()
    while 1:
        logger.debug('regular validator started')
        try:
            loop.run_until_complete(validator.start())
        except Exception:
            logger.error(traceback.format_exc())
        logger.debug('regular validator finished')
        time.sleep(VALIDATE_CYCLE_TIME)
Example #6
0
    async def validate(self, proxies):
        logger.debug('validator started')
        while 1:
            proxy = await proxies.get()
            async with aiohttp.ClientSession() as session:
                try:
                    real_proxy = 'http://' + proxy
                    async with session.get(self.validate_url, proxy=real_proxy, timeout=validate_timeout) as resp:
                        self._conn.put(proxy)
                except Exception as e:
                    logger.error(e)

            proxies.task_done()
Example #7
0
 async def _validator(self, proxy):
     HEADERS['User-Agent'] = choice(USER_AGENT)
     async with aiohttp.ClientSession() as session:
         try:
             real_proxy = 'http://' + proxy
             async with session.get(self.validate_url,
                                    headers=HEADERS,
                                    proxy=real_proxy,
                                    timeout=VALIDATE_TIMEOUT) as resp:
                 if resp.status == 200:
                     conn.put(proxy)
         except asyncio.TimeoutError:
             pass
         except Exception as e:
             logger.error(e)
Example #8
0
    async def regular_validate(self):
        count = min(ceil(self._conn.count * validate_ratio), validate_upper_limit)
        old_proxies = self._conn.get_list(count) # TODO: set an upper limit
        valid_proxies = []
        logger.debug('regular validator started, {0} to validate'.format(len(old_proxies)))
        async with aiohttp.ClientSession() as session:
            for proxy in old_proxies:
                try:
                    real_proxy = 'http://' + proxy.decode('utf-8') # proxy from redis was bytes type
                    async with session.get(self.validate_url, proxy=real_proxy, timeout=validate_timeout) as resp:
                        valid_proxies.append(proxy)
                except asyncio.TimeoutError:
                    continue
                except Exception as e:
                    logger.error(e)

        logger.debug('regular validator finished, {0} passed'.format(len(valid_proxies)))
        self._conn.put_list(valid_proxies)