Ejemplo n.º 1
0
def vaildate_pub():
    """
    将待校验的代理送入队列

    :return:
    """
    if client.setnx(
            PUBLISH_LOCK,
            "vaildate_publish") == 0 or client.llen(VALIDATE_QUEUE_KEY):
        # 如果已有代理正在入队列,或者队列中存在经过两次校验周期仍存在的代理
        proxy_validator.info("proxy key already vildate.")
    else:
        try:
            client.expire(PUBLISH_LOCK, PROXY_VALIDATE_TIME * 2)
            pipe = client.pipeline()
            # 获取所有的代理
            keys = (v for k, v in WEB_AVAILABLE_PROXIES.items())
            proxy_keys = client.sunion(*keys)
            for proxy in proxy_keys:
                if proxy is not None:
                    pipe.lpush(VALIDATE_QUEUE_KEY, proxy)
                    proxy_validator.info(
                        "This proxy '{}' has enter queue".format(proxy))
            pipe.execute()
            client.publish(VALIDATE_CHANNEL, "validator")
        except Exception:
            client.delete(PUBLISH_LOCK)
Ejemplo n.º 2
0
    async def check_anonymous(proxy: str) -> bool:
        """
        检测代理的匿名程度

        :param proxy: 待校验的代理
        :return: 校验结果,如果是高匿代理就返回True
        """
        anonymous = True
        try:
            connector = ProxyConnector.from_url(proxy)
            requests.urllib3.disable_warnings()
            ua = UserAgent()
            async with ClientSession(connector=connector,
                                     timeout=5) as session:
                # 异步http请求
                async with session.get(ANONYMOUS_CHECK_API,
                                       ssl=False,
                                       headers={"User-Agent": ua.random()},
                                       timeout=5) as response:
                    res = await response.text()
                    res = json.loads(res)
                    anonymous = ProxyValidator.is_anonymous(res)
                    if anonymous:
                        proxy_validator.info(
                            "The proxy {} is anonymous".format(proxy))
                await session.close()
                return anonymous
        except Exception as e:
            proxy_validator.error("Checking proxy {} anonymous "
                                  "has an error:{} type {}".format(
                                      proxy, str(e), type(e)))
            raise ClientError("check anonymous")
Ejemplo n.º 3
0
    def is_anonymous(response: dict) -> bool:
        """
        通过接口判断当前代理的可匿程度

        :param response: 请检测api的响应
        :return: 校验结果,如果是高匿代理就返回True
        """
        origin = response["origin"]
        proxy_connection = response.get("Proxy-Connection", "")
        proxy_validator.info(
            "Checking anonymous proxy response is {}".format(response))
        if origin != PUBLIC_IP and not proxy_connection:
            return True
        return False
Ejemplo n.º 4
0
async def validate_task():
    """
    定时代理校验任务
    """
    tasks = []
    result = None
    proxy = client.rpop(VALIDATE_QUEUE_KEY)
    while proxy:
        proxy_validator.info(
            "This proxy {} has joined the validation task.".format(proxy))
        tasks.extend(get_vaildator_task(proxy))
        proxy = client.rpop(VALIDATE_QUEUE_KEY)
        time.sleep(0.5)
    if tasks:
        result = await asyncio.gather(*tasks)
        for r in result:
            res = r
            s = "available" if res.available else "unavailable"
            proxy_validator.info(
                "{} validation for {} tasks result is {} .".format(
                    res.proxy, res.web_key, s))
            try:
                if not res.available:
                    # 不可用就删除
                    await AvailableProxy.delete_proxy(res.proxy, res.web_key)
                    proxy_validator.info("delete proxy {} with {}".format(
                        res.proxy, res.web_key))
                else:
                    proxy_validator.info("update proxy {}".format(res.proxy))
                    await AvailableProxy.update_proxy_delay(
                        res.proxy, res.dst, res.delay)
            except Exception as e:
                proxy_validator.error(
                    "An exception {} occurred while checking proxy {} availability."
                    .format(e, res.proxy))
            finally:
                client.delete(PUBLISH_LOCK)
    return result
Ejemplo n.º 5
0
    async def check_proxy(self, proxy: str, dst: str,
                          web_key: str) -> ValidateResult:
        """
        校验代理的可用性

        :param proxy: 待校验的代理
        :param dst: 目标站点地址
        :param web_key: 目标站点
        :return: 校验结果
        """
        result = ValidateResult(proxy=proxy,
                                delay=-1,
                                web_key=web_key,
                                dst=dst,
                                useful=1)
        time_start = time.time()
        try:
            # 启用代理
            connector = ProxyConnector(verify_ssl=False).from_url(proxy)
            requests.urllib3.disable_warnings()
            # 异步http请求
            async with ClientSession(connector=connector,
                                     timeout=self._timeout) as session:
                params = {
                    "url": dst,
                    "verify_ssl": False,
                    "timeout": self._timeout,
                    "headers": {
                        "User-Agent": self._ua.random()
                    }
                }
                # verify_ssl = False
                if "https" in proxy.split(":"):
                    params["verify_ssl"] = False
                # 异步http请求
                async with session.get(**params) as response:
                    proxy_validator.info(
                        "wait proxy {} for {} response".format(proxy, dst))
                    await response.text()
                await session.close()
            time_end = time.time()
            delay = time_end - time_start
            proxy_validator.info(
                "check proxy {} for {} success cost {} s".format(
                    proxy, dst, delay))
            result.delay = delay
            result.available = 1
            # 请求超时就认为代理不可用
            if delay > PROXY_REQUEST_DELAY:
                result.available = 0
            return result
        except (BaseException, asyncio.TimeoutError, ClientError,
                ClientHttpProxyError, ClientSSLError) as e:
            err_msg = e
            if isinstance(e, asyncio.TimeoutError) or isinstance(
                    e, ClientHttpProxyError):
                err_msg = "Http request timeout"
            if not isinstance(e, ClientSSLError) or not isinstance(
                    e, ssl.SSLError):
                result.available = 0
            # 重试
            if self._retry <= VAILDATORS_RETRY:
                # 重试次数小于阈值就再试一次
                self._retry = self._retry + 1
                result = await self.check_proxy(proxy, dst, web_key)
                return result
            time_end = time.time()
            proxy_validator.error("check proxy {} {} times fail for {} "
                                  "and cost {} s".format(
                                      proxy, self._retry, dst,
                                      time_end - time_start))
            proxy_validator.error("check proxy {} for {} "
                                  "error:{} type {}".format(
                                      proxy, dst, err_msg, type(e)))
            self._retry = 0
            result.delay = time_end - time_start
            return result