def get_connection(self, url, proxies=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. """ proxies = proxies or {} proxy = proxies.get(urlparse(url.lower()).scheme) if proxy: except_on_missing_scheme(proxy) proxy_headers = self.proxy_headers(proxy) if not proxy in self.proxy_manager: self.proxy_manager[proxy] = proxy_from_url( proxy, proxy_headers=proxy_headers) conn = self.proxy_manager[proxy].connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) url = parsed.geturl() conn = self.poolmanager.connection_from_url(url) return conn
def except_on_missing_scheme(url): """Given a URL, raise a MissingSchema exception if the scheme is missing. """ scheme, netloc, path, params, query, fragment = urlparse(url) if not scheme: raise MissingSchema('Proxy URLs must have explicit schemes.')
def get_auth_from_url(url): """Given a url with authentication components, extract them into a tuple of username,password.""" if url: url = unquote(url) parsed = urlparse(url) return (parsed.username, parsed.password) else: return ('', '')
def get_full_url(self): # Only return the response's URL if the user hadn't set the Host # header if not self._r.headers.get('Host'): return self._r.url # If they did set it, retrieve it and reconstruct the expected domain host = self._r.headers['Host'] parsed = urlparse(self._r.url) # Reconstruct the URL as we expect it return urlunparse([ parsed.scheme, host, parsed.path, parsed.params, parsed.query, parsed.fragment ])
def get_environ_proxies(url): """Return a dict of environment proxies.""" get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy = get_proxy('no_proxy') netloc = urlparse(url).netloc if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. no_proxy = no_proxy.replace(' ', '').split(',') ip = netloc.split(':')[0] if is_ipv4_address(ip): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): if address_in_network(ip, proxy_ip): return {} else: for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith( host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return {} # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. if proxy_bypass(netloc): return {} # If we get here, we either didn't have no_proxy set or we're not going # anywhere that no_proxy applies to, and the system settings don't require # bypassing the proxy for the current URL. return getproxies()
def request_url(self, request, proxies): """Obtain the url to use when making the final request. If the message is being sent through a HTTP proxy, the full URL has to be used. Otherwise, we should only use the path portion of the URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param proxies: A dictionary of schemes to proxy URLs. """ proxies = proxies or {} scheme = urlparse(request.url).scheme proxy = proxies.get(scheme) if proxy and scheme != 'https': url, _ = urldefrag(request.url) else: url = request.path_url return url
def get_netrc_auth(url): """Returns the Requests tuple auth for a given url from netrc.""" try: from netrc import netrc, NetrcParseError locations = (os.path.expanduser('~/{0}'.format(f)) for f in NETRC_FILES) netrc_path = None for loc in locations: if os.path.exists(loc) and not netrc_path: netrc_path = loc # Abort early if there isn't one. if netrc_path is None: return netrc_path ri = urlparse(url) # Strip port numbers from netloc host = ri.netloc.split(':')[0] try: _netrc = netrc(netrc_path).authenticators(host) if _netrc: # Return with login / password login_i = (0 if _netrc[0] else 1) return (_netrc[login_i], _netrc[2]) except (NetrcParseError, IOError): # If there was a parsing error or a permissions issue reading the file, # we'll just skip netrc auth pass # AppEngine hackiness. except (ImportError, AttributeError): pass
def resolve_redirects(self, resp, req, stream=False, timeout=None, verify=True, cert=None, proxies=None): """Receives a Response. Returns a generator of Responses.""" i = 0 # ((resp.status_code is codes.see_other)) while ('location' in resp.headers and resp.status_code in REDIRECT_STATI): prepared_request = req.copy() resp.content # Consume socket so it can be released if i >= self.max_redirects: raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects) # Release the connection back into the pool. resp.close() url = resp.headers['location'] method = req.method # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(resp.url) url = '%s:%s' % (parsed_rurl.scheme, url) # The scheme should be lower case... parsed = urlparse(url) url = parsed.geturl() # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. if not urlparse(url).netloc: url = urljoin(resp.url, requote_uri(url)) else: url = requote_uri(url) prepared_request.url = url # http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.3.4 if (resp.status_code == codes.see_other and method != 'HEAD'): method = 'GET' # Do what the browsers do, despite standards... # First, turn 302s into GETs. if resp.status_code == codes.found and method != 'HEAD': method = 'GET' # Second, if a POST is responded to with a 301, turn it into a GET. # This bizarre behaviour is explained in Issue 1704. if resp.status_code == codes.moved and method == 'POST': method = 'GET' prepared_request.method = method # https://github.com/kennethreitz/requests/issues/1084 if resp.status_code not in (codes.temporary, codes.resume): if 'Content-Length' in prepared_request.headers: del prepared_request.headers['Content-Length'] prepared_request.body = None headers = prepared_request.headers try: del headers['Cookie'] except KeyError: pass extract_cookies_to_jar(prepared_request._cookies, prepared_request, resp.raw) prepared_request.prepare_cookies(prepared_request._cookies) resp = self.send( prepared_request, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies, allow_redirects=False, ) extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) i += 1 yield resp
def build_digest_header(self, method, url): realm = self.chal['realm'] nonce = self.chal['nonce'] qop = self.chal.get('qop') algorithm = self.chal.get('algorithm') opaque = self.chal.get('opaque') if algorithm is None: _algorithm = 'MD5' else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': def md5_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.md5(x).hexdigest() hash_utf8 = md5_utf8 elif _algorithm == 'SHA': def sha_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) if hash_utf8 is None: return None # XXX not implemented yet entdig = None p_parsed = urlparse(url) path = p_parsed.path if p_parsed.query: path += '?' + p_parsed.query A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) if nonce == self.last_nonce: self.nonce_count += 1 else: self.nonce_count = 1 ncvalue = '%08x' % self.nonce_count s = str(self.nonce_count).encode('utf-8') s += nonce.encode('utf-8') s += time.ctime().encode('utf-8') s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if qop is None: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): respdig = KD(HA1, noncebit) else: # XXX handle auth-int. return None self.last_nonce = nonce # XXX should the partial digests be encoded too? base = 'username="******", realm="%s", nonce="%s", uri="%s", ' \ 'response="%s"' % (self.username, realm, nonce, path, respdig) if opaque: base += ', opaque="%s"' % opaque if algorithm: base += ', algorithm="%s"' % algorithm if entdig: base += ', digest="%s"' % entdig if qop: base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) return 'Digest %s' % (base)
def get_host(self): return urlparse(self._r.url).netloc
def __init__(self, request): self._r = request self._new_headers = {} self.type = urlparse(self._r.url).scheme