def request_url(self, request, proxies): """Obtain the url to use when making the final request. If the message is being sent through a HTTP proxy, the full URL has to be used. Otherwise, we should only use the path portion of the URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param request: The :class:`PreparedRequest <PreparedRequest>` being sent. :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs. :rtype: str """ proxy = select_proxy(request.url, proxies) scheme = urlparse(request.url).scheme is_proxied_http_request = (proxy and scheme != 'https') using_socks_proxy = False if proxy: proxy_scheme = urlparse(proxy).scheme.lower() using_socks_proxy = proxy_scheme.startswith('socks') url = request.path_url if is_proxied_http_request and not using_socks_proxy: url = urldefragauth(request.url) return url
def should_bypass_proxies(url, no_proxy): """ Returns whether we should bypass proxies or not. :rtype: bool """ get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper()) # First check whether no_proxy is defined. If it is, check that the URL # we're getting isn't in the no_proxy list. no_proxy_arg = no_proxy if no_proxy is None: no_proxy = get_proxy('no_proxy') netloc = urlparse(url).netloc if no_proxy: # We need to check whether we match here. We need to see if we match # the end of the netloc, both with and without the port. no_proxy = (host for host in no_proxy.replace(' ', '').split(',') if host) ip = netloc.split(':')[0] if is_ipv4_address(ip): for proxy_ip in no_proxy: if is_valid_cidr(proxy_ip): if address_in_network(ip, proxy_ip): return True elif ip == proxy_ip: # If no_proxy ip was defined in plain IP notation instead of cidr notation & # matches the IP of the index return True else: for host in no_proxy: if netloc.endswith(host) or netloc.split(':')[0].endswith( host): # The URL does match something in no_proxy, so we don't want # to apply the proxies on this URL. return True # If the system proxy settings indicate that this URL should be bypassed, # don't proxy. # The proxy_bypass function is incredibly buggy on OS X in early versions # of Python 2.6, so allow this call to fail. Only catch the specific # exceptions we've seen, though: this call failing in other ways can reveal # legitimate problems. with set_environ('no_proxy', no_proxy_arg): try: bypass = proxy_bypass(netloc) except (TypeError, socket.gaierror): bypass = False if bypass: return True return False
def get_full_url(self): # Only return the response's URL if the user hadn't set the Host # header if not self._r.headers.get('Host'): return self._r.url # If they did set it, retrieve it and reconstruct the expected domain host = to_native_string(self._r.headers['Host'], encoding='utf-8') parsed = urlparse(self._r.url) # Reconstruct the URL as we expect it return urlunparse([ parsed.scheme, host, parsed.path, parsed.params, parsed.query, parsed.fragment ])
def rebuild_auth(self, prepared_request, response): """When being redirected we may want to strip authentication from the request to avoid leaking credentials. This method intelligently removes and reapplies authentication where possible to avoid credential loss. """ headers = prepared_request.headers url = prepared_request.url if 'Authorization' in headers: # If we get redirected to a new host, we should strip out any # authentication headers. original_parsed = urlparse(response.request.url) redirect_parsed = urlparse(url) if (original_parsed.hostname != redirect_parsed.hostname): del headers['Authorization'] # .netrc might have more auth for us on our new host. new_auth = get_netrc_auth(url) if self.trust_env else None if new_auth is not None: prepared_request.prepare_auth(new_auth) return
def get_auth_from_url(url): """Given a url with authentication components, extract them into a tuple of username,password. :rtype: (str,str) """ parsed = urlparse(url) try: auth = (unquote(parsed.username), unquote(parsed.password)) except (AttributeError, TypeError): auth = ('', '') return auth
def get_netrc_auth(url, raise_errors=False): """Returns the Requests tuple auth for a given url from netrc.""" try: from netrc import netrc, NetrcParseError netrc_path = None for f in NETRC_FILES: try: loc = os.path.expanduser('~/{0}'.format(f)) except KeyError: # os.path.expanduser can fail when $HOME is undefined and # getpwuid fails. See http://bugs.python.org/issue20164 & # https://github.com/requests/requests/issues/1846 return if os.path.exists(loc): netrc_path = loc break # Abort early if there isn't one. if netrc_path is None: return ri = urlparse(url) # Strip port numbers from netloc. This weird `if...encode`` dance is # used for Python 3.2, which doesn't support unicode literals. splitstr = b':' if isinstance(url, str): splitstr = splitstr.decode('ascii') host = ri.netloc.split(splitstr)[0] try: _netrc = netrc(netrc_path).authenticators(host) if _netrc: # Return with login / password login_i = (0 if _netrc[0] else 1) return (_netrc[login_i], _netrc[2]) except (NetrcParseError, IOError): # If there was a parsing error or a permissions issue reading the file, # we'll just skip netrc auth unless explicitly asked to raise errors. if raise_errors: raise # AppEngine hackiness. except (ImportError, AttributeError): pass
def urldefragauth(url): """ Given a url remove the fragment and the authentication part. :rtype: str """ scheme, netloc, path, params, query, fragment = urlparse(url) # see func:`prepend_scheme_if_needed` if not netloc: netloc, path = path, netloc netloc = netloc.rsplit('@', 1)[-1] return urlunparse((scheme, netloc, path, params, query, ''))
def prepend_scheme_if_needed(url, new_scheme): """Given a URL that may or may not have a scheme, prepend the given scheme. Does not replace a present scheme with the one provided as an argument. :rtype: str """ scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme) # urlparse is a finicky beast, and sometimes decides that there isn't a # netloc present. Assume that it's being over-cautious, and switch netloc # and path if urlparse decided there was no netloc. if not netloc: netloc, path = path, netloc return urlunparse((scheme, netloc, path, params, query, fragment))
def rebuild_proxies(self, prepared_request, proxies): """This method re-evaluates the proxy configuration by considering the environment variables. If we are redirected to a URL covered by NO_PROXY, we strip the proxy configuration. Otherwise, we set missing proxy keys for this URL (in case they were stripped by a previous redirect). This method also replaces the Proxy-Authorization header where necessary. :rtype: dict """ proxies = proxies if proxies is not None else {} headers = prepared_request.headers url = prepared_request.url scheme = urlparse(url).scheme new_proxies = proxies.copy() no_proxy = proxies.get('no_proxy') bypass_proxy = should_bypass_proxies(url, no_proxy=no_proxy) if self.trust_env and not bypass_proxy: environ_proxies = get_environ_proxies(url, no_proxy=no_proxy) proxy = environ_proxies.get(scheme, environ_proxies.get('all')) if proxy: new_proxies.setdefault(scheme, proxy) if 'Proxy-Authorization' in headers: del headers['Proxy-Authorization'] try: username, password = get_auth_from_url(new_proxies[scheme]) except KeyError: username, password = None, None if username and password: headers['Proxy-Authorization'] = _basic_auth_str(username, password) return new_proxies
def get_connection(self, url, proxies=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. :rtype: urllib3.ConnectionPool """ proxy = select_proxy(url, proxies) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) url = parsed.geturl() conn = self.poolmanager.connection_from_url(url) return conn
def select_proxy(url, proxies): """Select a proxy for the url, if applicable. :param url: The url being for the request :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs """ proxies = proxies or {} urlparts = urlparse(url) if urlparts.hostname is None: return proxies.get(urlparts.scheme, proxies.get('all')) proxy_keys = [ urlparts.scheme + '://' + urlparts.hostname, urlparts.scheme, 'all://' + urlparts.hostname, 'all', ] proxy = None for proxy_key in proxy_keys: if proxy_key in proxies: proxy = proxies[proxy_key] break return proxy
def build_digest_header(self, method, url): """ :rtype: str """ realm = self._thread_local.chal['realm'] nonce = self._thread_local.chal['nonce'] qop = self._thread_local.chal.get('qop') algorithm = self._thread_local.chal.get('algorithm') opaque = self._thread_local.chal.get('opaque') hash_utf8 = None if algorithm is None: _algorithm = 'MD5' else: _algorithm = algorithm.upper() # lambdas assume digest modules are imported at the top level if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': def md5_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.md5(x).hexdigest() hash_utf8 = md5_utf8 elif _algorithm == 'SHA': def sha_utf8(x): if isinstance(x, str): x = x.encode('utf-8') return hashlib.sha1(x).hexdigest() hash_utf8 = sha_utf8 KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) if hash_utf8 is None: return None # XXX not implemented yet entdig = None p_parsed = urlparse(url) #: path is request-uri defined in RFC 2616 which should not be empty path = p_parsed.path or "/" if p_parsed.query: path += '?' + p_parsed.query A1 = '%s:%s:%s' % (self.username, realm, self.password) A2 = '%s:%s' % (method, path) HA1 = hash_utf8(A1) HA2 = hash_utf8(A2) if nonce == self._thread_local.last_nonce: self._thread_local.nonce_count += 1 else: self._thread_local.nonce_count = 1 ncvalue = '%08x' % self._thread_local.nonce_count s = str(self._thread_local.nonce_count).encode('utf-8') s += nonce.encode('utf-8') s += time.ctime().encode('utf-8') s += os.urandom(8) cnonce = (hashlib.sha1(s).hexdigest()[:16]) if _algorithm == 'MD5-SESS': HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) if not qop: respdig = KD(HA1, "%s:%s" % (nonce, HA2)) elif qop == 'auth' or 'auth' in qop.split(','): noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', HA2) respdig = KD(HA1, noncebit) else: # XXX handle auth-int. return None self._thread_local.last_nonce = nonce # XXX should the partial digests be encoded too? base = 'username="******", realm="%s", nonce="%s", uri="%s", ' \ 'response="%s"' % (self.username, realm, nonce, path, respdig) if opaque: base += ', opaque="%s"' % opaque if algorithm: base += ', algorithm="%s"' % algorithm if entdig: base += ', digest="%s"' % entdig if qop: base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) return 'Digest %s' % (base)
def resolve_redirects(self, resp, req, stream=False, timeout=None, verify=True, cert=None, proxies=None, yield_requests=False, **adapter_kwargs): """Receives a Response. Returns a generator of Responses or Requests.""" hist = [] # keep track of history url = self.get_redirect_target(resp) while url: prepared_request = req.copy() # Update history and keep track of redirects. # resp.history must ignore the original request in this loop hist.append(resp) resp.history = hist[1:] try: resp.content # Consume socket so it can be released except (ChunkedEncodingError, ContentDecodingError, RuntimeError): resp.raw.read(decode_content=False) if len(resp.history) >= self.max_redirects: raise TooManyRedirects('Exceeded %s redirects.' % self.max_redirects, response=resp) # Release the connection back into the pool. resp.close() # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(resp.url) url = '%s:%s' % (to_native_string(parsed_rurl.scheme), url) # The scheme should be lower case... parsed = urlparse(url) url = parsed.geturl() # Facilitate relative 'location' headers, as allowed by RFC 7231. # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource') # Compliant with RFC3986, we percent encode the url. if not parsed.netloc: url = urljoin(resp.url, requote_uri(url)) else: url = requote_uri(url) prepared_request.url = to_native_string(url) self.rebuild_method(prepared_request, resp) # https://github.com/requests/requests/issues/1084 if resp.status_code not in (codes.temporary_redirect, codes.permanent_redirect): # https://github.com/requests/requests/issues/3490 purged_headers = ('Content-Length', 'Content-Type', 'Transfer-Encoding') for header in purged_headers: prepared_request.headers.pop(header, None) prepared_request.body = None headers = prepared_request.headers try: del headers['Cookie'] except KeyError: pass # Extract any cookies sent on the response to the cookiejar # in the new request. Because we've mutated our copied prepared # request, use the old one that we haven't yet touched. extract_cookies_to_jar(prepared_request._cookies, req, resp.raw) merge_cookies(prepared_request._cookies, self.cookies) prepared_request.prepare_cookies(prepared_request._cookies) # Rebuild auth and proxy information. proxies = self.rebuild_proxies(prepared_request, proxies) self.rebuild_auth(prepared_request, resp) # A failed tell() sets `_body_position` to `object()`. This non-None # value ensures `rewindable` will be True, allowing us to raise an # UnrewindableBodyError, instead of hanging the connection. rewindable = ( prepared_request._body_position is not None and ('Content-Length' in headers or 'Transfer-Encoding' in headers) ) # Attempt to rewind consumed file-like object. if rewindable: rewind_body(prepared_request) # Override the original request. req = prepared_request if yield_requests: yield req else: resp = self.send( req, stream=stream, timeout=timeout, verify=verify, cert=cert, proxies=proxies, allow_redirects=False, **adapter_kwargs ) extract_cookies_to_jar(self.cookies, prepared_request, resp.raw) # extract redirect url, if any, for the next loop url = self.get_redirect_target(resp) yield resp
def get_host(self): return urlparse(self._r.url).netloc
def __init__(self, request): self._r = request self._new_headers = {} self.type = urlparse(self._r.url).scheme