Ejemplo n.º 1
0
def __check_http_proxy(proxies, is_http=True):
    types = -1
    speed = -1
    if is_http:
        test_url = config.TEST_HTTP_HEADER
    else:
        test_url = config.TEST_HTTPS_HEADER
    try:
        start = time.time()
        r = requests.get(url=test_url, headers=config.get_header(), timeout=config.TIMEOUT, proxies=proxies)
        if r.ok:
            speed = round(time.time() - start, 2)
            content = json.loads(r.text)
            headers = content['headers']
            ip = content['origin']
            proxy_connection = headers.get('Proxy-Connection', None)
            if ',' in ip:
                types = 2
            elif proxy_connection:
                types = 1
            else:
                types = 0
            return True, types, speed
        else:
            return False, types, speed
    except Exception as e:
        log.warning(e.args)
        return False, types, speed
Ejemplo n.º 2
0
def get_target_ip():
    """
    Crawl what website to get its IP, this is not a proxy IP
    :return:
    """
    try:
        r = requests.get(url=config.TEST_IP, headers=config.get_header(), timeout=config.TIMEOUT)
        ip = json.loads(r.text)
        return ip['origin']
    except Exception:
        raise TestUrlFail
Ejemplo n.º 3
0
    def download(url):
        try:
            r = requests.get(url=url,
                             headers=config.get_header(),
                             timeout=config.TIMEOUT)
            r.encoding = chardet.detect(r.content)['encoding']
            if (not r.ok) or len(r.content) < 500:
                raise ConnectionError
            else:
                return r.text

        except Exception as e:
            print(e.args)
            count = 0  # 重试次数
            proxy_list = sql_helper.select(10)
            if not proxy_list:
                return None

            while count < config.RETRY_TIME:
                try:
                    proxy = random.choice(proxy_list)
                    ip = proxy[0]
                    port = proxy[1]
                    proxies = {
                        "http": "http://%s:%s" % (ip, port),
                        "https": "http://%s:%s" % (ip, port)
                    }

                    r = requests.get(url=url,
                                     headers=config.get_header(),
                                     timeout=config.TIMEOUT,
                                     proxies=proxies)
                    r.encoding = chardet.detect(r.content)['encoding']
                    if (not r.ok) or len(r.content) < 500:
                        raise ConnectionError
                    else:
                        return r.text
                except Exception:
                    count += 1

        return None
Ejemplo n.º 4
0
def get_page(url, options=None):
    """
    Crawl the proxy
    :param url: Url
    :param options: opt
    :return:
    """
    if options is None:
        options = {}
    print('Crawling', url)
    headers = dict(get_header(), **options)
    print("Print headers:", headers)
    try:
        response = requests.get(url, timeout=5, headers=headers)
        print("Crawl success", url, response.status_code)
        if response.status_code == 200:
            return response.text
    except ConnectionError:
        print('Crawl failed', url)
        return None
Ejemplo n.º 5
0
def __check_proxy_a(proxy):
    """
    Detects whether the agent is available
    :param proxy: proxy str
    :return: tuple(protocol, speed, types)
    """
    try:
        start = time.time()
        r = requests.get(url=config.TEST_URL, headers=config.get_header(),
                         timeout=config.TIMEOUT, proxies=proxy)
        r.encoding = chardet.detect(r.content)['encoding']
        if r.ok:
            speed = round(time.time() - start, 2)
            protocol = 0
            types = 0
            return protocol, speed, types
        else:
            return __check_proxy_b(proxy)
    except (requests.exceptions.ProxyError, requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout,
            requests.exceptions.ChunkedEncodingError):
        return __check_proxy_b(proxy)