def get_connection(self, url, proxies=None): """Returns a urllib3 connection for the given URL. This should not be called from user code, and is only exposed for use when subclassing the :class:`HTTPAdapter <requests.adapters.HTTPAdapter>`. :param url: The URL to connect to. :param proxies: (optional) A Requests-style dictionary of proxies used on this request. :rtype: urllib3.ConnectionPool """ proxy = select_proxy(url, proxies) if proxy: proxy = prepend_scheme_if_needed(proxy, 'http') proxy_url = parse_url(proxy) if not proxy_url.host: raise InvalidProxyURL("Please check proxy URL. It is malformed" " and could be missing the host.") proxy_manager = self.proxy_manager_for(proxy) conn = proxy_manager.connection_from_url(url) else: # Only scheme should be lower case parsed = urlparse(url) url = parsed.geturl() conn = self.poolmanager.connection_from_url(url) return conn
def prepend_scheme_if_needed(url, new_scheme): """Given a URL that may or may not have a scheme, prepend the given scheme. Does not replace a present scheme with the one provided as an argument. :rtype: str """ parsed = parse_url(url) scheme, auth, host, port, path, query, fragment = parsed # A defect in urlparse determines that there isn't a netloc present in some # urls. We previously assumed parsing was overly cautious, and swapped the # netloc and path. Due to a lack of tests on the original defect, this is # maintained with parse_url for backwards compatibility. netloc = parsed.netloc if not netloc: netloc, path = path, netloc if auth: # parse_url doesn't provide the netloc with auth # so we'll add it ourselves. netloc = '@'.join([auth, netloc]) if scheme is None: scheme = new_scheme if path is None: path = '' return urlunparse((scheme, netloc, path, '', query, fragment))
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. #: https://github.com/requests/requests/pull/2238 if isinstance(url, bytes): url = url.decode('utf8') else: url = unicode(url) if is_py2 else str(url) # Remove leading whitespaces from url url = url.lstrip() # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: error = ( "Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?" ) error = error.format(to_native_string(url, 'utf8')) raise MissingSchema(error) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA # behaviour. For strings containing only ASCII characters, we need to also verify # it doesn't start with a wildcard (*), before allowing the unencoded hostname. if not unicode_is_ascii(host): try: host = self._get_idna_encoded_host(host) except UnicodeError: raise InvalidURL('URL has an invalid label.') elif host.startswith(u'*'): raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') if isinstance(params, (str, bytes)): params = to_native_string(params) enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri( urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. #: https://github.com/requests/requests/pull/2238 if isinstance(url, bytes): url = url.decode('utf8') else: url = unicode(url) if is_py2 else str(url) # Remove leading whitespaces from url url = url.lstrip() # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ':' in url and not url.lower().startswith('http'): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: error = ("Invalid URL {0!r}: No schema supplied. Perhaps you meant http://{0}?") error = error.format(to_native_string(url, 'utf8')) raise MissingSchema(error) if not host: raise InvalidURL("Invalid URL %r: No host supplied" % url) # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA # behaviour. For strings containing only ASCII characters, we need to also verify # it doesn't start with a wildcard (*), before allowing the unencoded hostname. if not unicode_is_ascii(host): try: host = self._get_idna_encoded_host(host) except UnicodeError: raise InvalidURL('URL has an invalid label.') elif host.startswith(u'*'): raise InvalidURL('URL has an invalid label.') # Carefully reconstruct the network location netloc = auth or '' if netloc: netloc += '@' netloc += host if port: netloc += ':' + str(port) # Bare domains aren't valid URLs. if not path: path = '/' if is_py2: if isinstance(scheme, str): scheme = scheme.encode('utf-8') if isinstance(netloc, str): netloc = netloc.encode('utf-8') if isinstance(path, str): path = path.encode('utf-8') if isinstance(query, str): query = query.encode('utf-8') if isinstance(fragment, str): fragment = fragment.encode('utf-8') if isinstance(params, (str, bytes)): params = to_native_string(params) enc_params = self._encode_params(params) if enc_params: if query: query = '%s&%s' % (query, enc_params) else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url
def prepare_url(self, url, params): """Prepares the given HTTP URL.""" #: Accept objects that have string representations. #: We're unable to blindly call unicode/str functions #: as this will include the bytestring indicator (b'') #: on python 3.x. #: https://github.com/psf/requests/pull/2238 if isinstance(url, bytes): url = url.decode("utf8") else: url = str(url) # Remove leading whitespaces from url url = url.lstrip() # Don't do any URL preparation for non-HTTP schemes like `mailto`, # `data` etc to work around exceptions from `url_parse`, which # handles RFC 3986 only. if ":" in url and not url.lower().startswith("http"): self.url = url return # Support for unicode domain names and paths. try: scheme, auth, host, port, path, query, fragment = parse_url(url) except LocationParseError as e: raise InvalidURL(*e.args) if not scheme: raise MissingSchema( f"Invalid URL {url!r}: No scheme supplied. " f"Perhaps you meant http://{url}?" ) if not host: raise InvalidURL(f"Invalid URL {url!r}: No host supplied") # In general, we want to try IDNA encoding the hostname if the string contains # non-ASCII characters. This allows users to automatically get the correct IDNA # behaviour. For strings containing only ASCII characters, we need to also verify # it doesn't start with a wildcard (*), before allowing the unencoded hostname. if not unicode_is_ascii(host): try: host = self._get_idna_encoded_host(host) except UnicodeError: raise InvalidURL("URL has an invalid label.") elif host.startswith(("*", ".")): raise InvalidURL("URL has an invalid label.") # Carefully reconstruct the network location netloc = auth or "" if netloc: netloc += "@" netloc += host if port: netloc += f":{port}" # Bare domains aren't valid URLs. if not path: path = "/" if isinstance(params, (str, bytes)): params = to_native_string(params) enc_params = self._encode_params(params) if enc_params: if query: query = f"{query}&{enc_params}" else: query = enc_params url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment])) self.url = url