Ejemplo n.º 1
0
        def redirect_request(self, req, fp, code, msg, hdrs, newurl):
            handler = maybe_add_ssl_handler(newurl, validate_certs)
            if handler:
                urllib_request._opener.add_handler(handler)

            if follow_redirects == 'urllib2':
                return urllib_request.HTTPRedirectHandler.redirect_request(
                    self, req, fp, code, msg, hdrs, newurl)
            elif follow_redirects in ['no', 'none', False]:
                raise urllib_error.HTTPError(newurl, code, msg, hdrs, fp)

            do_redirect = False
            if follow_redirects in ['all', 'yes', True]:
                do_redirect = (code >= 300 and code < 400)

            elif follow_redirects == 'safe':
                m = req.get_method()
                do_redirect = (code >= 300 and code < 400
                               and m in ('GET', 'HEAD'))

            if do_redirect:
                # be conciliant with URIs containing a space
                newurl = newurl.replace(' ', '%20')
                newheaders = dict(
                    (k, v) for k, v in req.headers.items()
                    if k.lower() not in ("content-length", "content-type"))
                return urllib_request.Request(
                    newurl,
                    headers=newheaders,
                    origin_req_host=req.get_origin_req_host(),
                    unverifiable=True)
            else:
                raise urllib_error.HTTPError(req.get_full_url(), code, msg,
                                             hdrs, fp)
Ejemplo n.º 2
0
def open_url(url,
             data=None,
             headers=None,
             method=None,
             use_proxy=True,
             force=False,
             last_mod_time=None,
             timeout=10,
             validate_certs=True,
             url_username=None,
             url_password=None,
             http_agent=None,
             force_basic_auth=False,
             follow_redirects='urllib2',
             client_cert=None,
             client_key=None,
             cookies=None):
    '''
    Sends a request via HTTP(S) or FTP using urllib2 (Python2) or urllib (Python3)

    Does not require the module environment
    '''
    handlers = []
    ssl_handler = maybe_add_ssl_handler(url, validate_certs)
    if ssl_handler:
        handlers.append(ssl_handler)

    # FIXME: change the following to use the generic_urlparse function
    #        to remove the indexed references for 'parsed'
    parsed = urlparse(url)
    if parsed[0] != 'ftp':
        username = url_username

        if headers is None:
            headers = {}

        if username:
            password = url_password
            netloc = parsed[1]
        elif '@' in parsed[1]:
            credentials, netloc = parsed[1].split('@', 1)
            if ':' in credentials:
                username, password = credentials.split(':', 1)
            else:
                username = credentials
                password = ''

            parsed = list(parsed)
            parsed[1] = netloc

            # reconstruct url without credentials
            url = urlunparse(parsed)

        if username and not force_basic_auth:
            passman = urllib_request.HTTPPasswordMgrWithDefaultRealm()

            # this creates a password manager
            passman.add_password(None, netloc, username, password)

            # because we have put None at the start it will always
            # use this username/password combination for  urls
            # for which `theurl` is a super-url
            authhandler = urllib_request.HTTPBasicAuthHandler(passman)
            digest_authhandler = urllib_request.HTTPDigestAuthHandler(passman)

            # create the AuthHandler
            handlers.append(authhandler)
            handlers.append(digest_authhandler)

        elif username and force_basic_auth:
            headers["Authorization"] = basic_auth_header(username, password)

        else:
            try:
                rc = netrc.netrc(os.environ.get('NETRC'))
                login = rc.authenticators(parsed[1])
            except IOError:
                login = None

            if login:
                username, _, password = login
                if username and password:
                    headers["Authorization"] = basic_auth_header(
                        username, password)

    if not use_proxy:
        proxyhandler = urllib_request.ProxyHandler({})
        handlers.append(proxyhandler)

    if HAS_SSLCONTEXT and not validate_certs:
        # In 2.7.9, the default context validates certificates
        context = SSLContext(ssl.PROTOCOL_SSLv23)
        context.options |= ssl.OP_NO_SSLv2
        context.options |= ssl.OP_NO_SSLv3
        context.verify_mode = ssl.CERT_NONE
        context.check_hostname = False
        handlers.append(
            HTTPSClientAuthHandler(client_cert=client_cert,
                                   client_key=client_key,
                                   context=context))
    elif client_cert:
        handlers.append(
            HTTPSClientAuthHandler(client_cert=client_cert,
                                   client_key=client_key))

    # pre-2.6 versions of python cannot use the custom https
    # handler, since the socket class is lacking create_connection.
    # Some python builds lack HTTPS support.
    if hasattr(socket, 'create_connection') and CustomHTTPSHandler:
        handlers.append(CustomHTTPSHandler)

    handlers.append(RedirectHandlerFactory(follow_redirects, validate_certs))

    # add some nicer cookie handling
    if cookies is not None:
        handlers.append(urllib_request.HTTPCookieProcessor(cookies))

    opener = urllib_request.build_opener(*handlers)
    urllib_request.install_opener(opener)

    data = to_bytes(data, nonstring='passthru')
    if method:
        if method.upper() not in ('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT',
                                  'DELETE', 'TRACE', 'CONNECT', 'PATCH'):
            raise ConnectionError('invalid HTTP request method; %s' %
                                  method.upper())
        request = RequestWithMethod(url, method.upper(), data)
    else:
        request = urllib_request.Request(url, data)

    # add the custom agent header, to help prevent issues
    # with sites that block the default urllib agent string
    if http_agent:
        request.add_header('User-agent', http_agent)

    # Cache control
    # Either we directly force a cache refresh
    if force:
        request.add_header('cache-control', 'no-cache')
    # or we do it if the original is more recent than our copy
    elif last_mod_time:
        tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
        request.add_header('If-Modified-Since', tstamp)

    # user defined headers now, which may override things we've set above
    if headers:
        if not isinstance(headers, dict):
            raise ValueError("headers provided to fetch_url() must be a dict")
        for header in headers:
            request.add_header(header, headers[header])

    urlopen_args = [request, None]
    if sys.version_info >= (2, 6, 0):
        # urlopen in python prior to 2.6.0 did not
        # have a timeout parameter
        urlopen_args.append(timeout)

    r = urllib_request.urlopen(*urlopen_args)
    return r