Ejemplo n.º 1
0
def strip_url_params3(url, strip=None):
    if not strip: strip = []

    parse = urllib.parse.urlparse(url)
    query = urllib.parse.parse_qs(parse.query)

    query = {k: v[0] for k, v in query.items() if k not in strip}
    query = urllib.parse.urlencode(query)
    new = parse._replace(query=query)

    return new.geturl()
Ejemplo n.º 2
0
def strip_url_params3(url, strip=None):
    if not strip: strip = []
    
    parse = urllib.parse.urlparse(url)
    query = urllib.parse.parse_qs(parse.query)
    
    query = {k: v[0] for k, v in query.items() if k not in strip}
    query = urllib.parse.urlencode(query)
    new = parse._replace(query=query)
    
    return new.geturl()
Ejemplo n.º 3
0
def _is_valid_proxy(proxy_source):
    """Check if is a valid proxy for a specific Source."""
    proxy, source = proxy_source

    _proxy = urllib.request.ProxyHandler({'http': proxy})
    opener = urllib.request.build_opener(_proxy)

    test = PROXY_MAP[source]
    url, valid, invalid = test[URL], test[VALID], test[INVALID]

    if source in VHOST:
        parse = urllib.parse.urlparse(url)
        netloc = parse.netloc
        url = parse._replace(netloc=VHOST[source]).geturl()
        req = urllib.request.Request(url)
        req.add_unredirected_header('Host', netloc)
    else:
        req = urllib.request.Request(url)

    try:
        response = opener.open(req, timeout=TIMEOUT)
        if response.info().get('Content-Encoding') == 'gzip':
            body = io.BytesIO(response.read())
            body = gzip.open(body).read().decode('utf-8')
        else:
            body = response.read().decode('utf-8')
    except Exception:
        return None

    if valid and invalid:
        is_valid = all(i in body for i in valid)
        is_valid = is_valid and not any(i in body for i in invalid)
    elif valid:
        is_valid = all(i in body for i in valid)
    elif invalid:
        is_valid = not any(i in body for i in invalid)
    else:
        is_valid = False
    if is_valid:
        return (proxy, source)