コード例 #1
0
def aliveproxy() -> Set[str]:
    urls = [
        "http://aliveproxy.com/fastest-proxies",
        "http://aliveproxy.com/high-anonymity-proxy-list",
        "http://aliveproxy.com/anonymous-proxy-list",
        "http://aliveproxy.com/transparent-proxy-list",
        "http://aliveproxy.com/us-proxy-list",
        "http://aliveproxy.com/gb-proxy-list",
        "http://aliveproxy.com/de-proxy-list",
        "http://aliveproxy.com/jp-proxy-list",
        "http://aliveproxy.com/ca-proxy-list",
    ]
    proxy_set7 = set()
    logger.info(
        f"Parsing proxies from {short_url(urls[0])}..."
    )  # aliveproxy.com
    for url in urls:
        r = requests.get(url, headers=standard_headers)
        soup = BeautifulSoup(r.content, "lxml")
        plp_s7 = len(proxy_set7)  # previous len proxy_set7
        for proxy in soup.find("table", {"class": "cm or"}).find_all("tr")[1:]:
            proxies = parse_proxies(str(proxy.find("td")))
            proxy_set7.update(proxies)
        link = r.url.split('/')[-2]
        logger.info(
            f"From {link} section were parsed {len(proxy_set7) - plp_s7} proxies"
        )
        time.sleep(1.3)  # crawling-delay
    logger.info(
        f"From {short_url(urls[0])} were parsed {len(proxy_set7)} proxies"
    )
    return proxy_set7
コード例 #2
0
def openproxy() -> Set[str]:
    date = dt.now().strftime("%d.%m.%Y %H:%M:%S")
    strp_date = dt.strptime(date, "%d.%m.%Y %H:%M:%S")
    stamp_date = int(time.mktime(strp_date.timetuple()) * 1000)
    proxy_set5 = set()
    links = set()

    url = f"https://api.openproxy.space/list?skip=0&ts={stamp_date}"
    r = requests.get(url, headers=standard_headers)
    data = r.json()
    for _dict in data:
        if len(_dict.get("protocols")) == 2:
            links.add(f"https://openproxy.space/list/{_dict.get('code')}")

    logger.info(f"Parsing proxies from {short_url(r.url)}...")
    for link in links:
        r = requests.get(link, headers=standard_headers)
        try:
            soup = BeautifulSoup(r.content, "lxml")
            proxies = parse_proxies(str(soup.find_all("script")[-6]))
            proxy_set5.update(proxies)
            logger.info(
                f"From {r.url.split('/')[-1]} section were parsed {len(proxies)} proxies"
            )
        except Exception:
            logger.exception(
                f"Proxies from {link.split('/')[-1]} were not loaded :("
            )
        time.sleep(1.3)  # crawling-delay
    logger.info(
        f"From {short_url(r.url)} were parsed {len(proxy_set5)} proxies"
    )
    return proxy_set5
コード例 #3
0
def awmproxy() -> Set[str]:
    url = "http://awmproxy.net"
    r = requests.get(url, headers=standard_headers)
    proxy_set4 = parse_proxies(r.text)
    logger.info(
        f"From {short_url(r.url)} were parsed {len(proxy_set4)} proxies"
    )
    return proxy_set4
コード例 #4
0
def proxy_ip_list() -> Set[str]:
    url = "http://proxy-ip-list.com/"
    r = requests.get(url, headers=standard_headers)
    proxies_set2 = parse_proxies(r.text)
    logger.info(
        f"From {short_url(r.url)} were parsed {len(proxies_set2)} proxies"
    )
    return proxies_set2
コード例 #5
0
def proxy50_50() -> Set[str]:
    url = "https://proxy50-50.blogspot.com/"
    r = requests.get(url, headers=standard_headers)
    proxies_set = parse_proxies(r.text)
    logger.info(
        f"From {short_url(r.url)} were parsed {len(proxies_set)} proxies"
    )
    return proxies_set
コード例 #6
0
def httptunnel() -> Set[str]:
    url = "http://www.httptunnel.ge/ProxyListForFree.aspx"
    r = requests.get(url, headers=standard_headers)
    proxies_set11 = parse_proxies(r.text)
    logger.info(
        f"From {short_url(r.url)} were parsed {len(proxies_set11)} proxies"
    )
    return proxies_set11
コード例 #7
0
def community_aliveproxy() -> Set[str]:
    url = "http://community.aliveproxy.com/proxy_list_http_fastest"
    proxy_set8 = set()
    r = requests.get(url, headers=standard_headers)
    soup = BeautifulSoup(r.content, "lxml")
    try:
        for proxy in soup.find("table").find_all("tr")[1:]:
            proxies = parse_proxies(proxy.text)
            proxy_set8.update(proxies)
        logger.info(
            f"From {short_url(r.url)} were parsed {len(proxy_set8)} proxies"
        )
    except Exception:
        logger.exception(f"Proxies from {short_url(r.url)} were not loaded :(")
    return proxy_set8