def test_iri_equality_special_cases(): assert rfc3986.iri_reference(u"http://Bü:ẞ@βόλος.com/β/ό?λ#ος") == ( u"http", u"Bü:ẞ@βόλος.com", u"/%CE%B2/%CF%8C", u"%CE%BB", u"%CE%BF%CF%82", ) with pytest.raises(TypeError): rfc3986.iri_reference(u"http://ẞ.com") == 1
def __init__(self, url: URLTypes = "", params: QueryParamTypes = None) -> None: if isinstance(url, str): try: self._uri_reference = rfc3986.iri_reference(url).encode() except rfc3986.exceptions.InvalidAuthority as exc: raise InvalidURL(message=str(exc)) from None if self.is_absolute_url: # We don't want to normalize relative URLs, since doing so # removes any leading `../` portion. self._uri_reference = self._uri_reference.normalize() elif isinstance(url, URL): self._uri_reference = url._uri_reference else: raise TypeError( f"Invalid type for url. Expected str or httpx.URL, got {type(url)}" ) # Add any query parameters, merging with any in the URL if needed. if params: if self._uri_reference.query: url_params = QueryParams(self._uri_reference.query) url_params.update(params) query_string = str(url_params) else: query_string = str(QueryParams(params)) self._uri_reference = self._uri_reference.copy_with( query=query_string)
def __init__(self, url: typing.Union["URL", str, RawURL] = "", **kwargs: typing.Any) -> None: if isinstance(url, (str, tuple)): if isinstance(url, tuple): raw_scheme, raw_host, port, raw_path = url scheme = raw_scheme.decode("ascii") host = raw_host.decode("ascii") if host and ":" in host and host[0] != "[": # it's an IPv6 address, so it should be enclosed in "[" and "]" # ref: https://tools.ietf.org/html/rfc2732#section-2 # ref: https://tools.ietf.org/html/rfc3986#section-3.2.2 host = f"[{host}]" port_str = "" if port is None else f":{port}" path = raw_path.decode("ascii") url = f"{scheme}://{host}{port_str}{path}" try: self._uri_reference = rfc3986.iri_reference(url).encode() except rfc3986.exceptions.InvalidAuthority as exc: raise InvalidURL(message=str(exc)) from None if self.is_absolute_url: # We don't want to normalize relative URLs, since doing so # removes any leading `../` portion. self._uri_reference = self._uri_reference.normalize() elif isinstance(url, URL): self._uri_reference = url._uri_reference else: raise TypeError( f"Invalid type for url. Expected str or httpx.URL, got {type(url)}: {url!r}" ) # Perform port normalization, following the WHATWG spec for default ports. # # See: # * https://tools.ietf.org/html/rfc3986#section-3.2.3 # * https://url.spec.whatwg.org/#url-miscellaneous # * https://url.spec.whatwg.org/#scheme-state default_port = { "ftp": ":21", "http": ":80", "https": ":443", "ws": ":80", "wss": ":443", }.get(self._uri_reference.scheme, "") authority = self._uri_reference.authority or "" if default_port and authority.endswith(default_port): authority = authority[:-len(default_port)] self._uri_reference = self._uri_reference.copy_with( authority=authority) if kwargs: self._uri_reference = self.copy_with(**kwargs)._uri_reference
def url_validator(_, attribute: attr.Attribute, urls: URLS): if not isinstance(urls, (set, list, tuple)): message = f'{attribute.name} is not a set, list or tuple instance: {urls}' logger.exception(message) raise TypeError(message) if not all(isinstance(url, str) for url in urls): message = f'not all items in urls are a string: {urls}' logger.exception(message) raise TypeError(message) validator = validators.Validator() allowed_schemes = ['https', 'http', 'file'] validator.allow_schemes(*allowed_schemes) validator.check_validity_of('scheme', 'host', 'path', 'query', 'fragment') for url in urls: uri = iri_reference(url).encode() try: validator.validate(uri) except exceptions.UnpermittedComponentError: message = f'{url} does not have a scheme in {allowed_schemes}' logger.exception(message) raise ValueError(message) except exceptions.InvalidComponentsError: # not sure this error can happen if we use uri_reference with a string, but let's be careful message = f'{url} is not a valid url' logger.exception(message) raise ValueError(message) if uri.scheme in ('http', 'https') and uri.host is None: message = f'url {url} must provide a host part' logger.exception(message) raise ValueError(message) if uri.scheme == 'file' and uri.path is None: message = f'url {url} must provide a path to a local file' logger.exception(message) raise ValueError(message)
def __init__( self, url: typing.Union["URL", str, RawURL] = "", params: QueryParamTypes = None ) -> None: if isinstance(url, (str, tuple)): if isinstance(url, tuple): raw_scheme, raw_host, port, raw_path = url scheme = raw_scheme.decode("ascii") host = raw_host.decode("ascii") port_str = "" if port is None else f":{port}" path = raw_path.decode("ascii") url = f"{scheme}://{host}{port_str}{path}" try: self._uri_reference = rfc3986.iri_reference(url).encode() except rfc3986.exceptions.InvalidAuthority as exc: raise InvalidURL(message=str(exc)) from None if self.is_absolute_url: # We don't want to normalize relative URLs, since doing so # removes any leading `../` portion. self._uri_reference = self._uri_reference.normalize() elif isinstance(url, URL): self._uri_reference = url._uri_reference else: raise TypeError( f"Invalid type for url. Expected str or httpx.URL, got {type(url)}" ) # Add any query parameters, merging with any in the URL if needed. if params: if self._uri_reference.query: url_params = QueryParams(self._uri_reference.query) url_params.update(params) query_string = str(url_params) else: query_string = str(QueryParams(params)) self._uri_reference = self._uri_reference.copy_with(query=query_string)
def test_encode_invalid_iri(iri): iri_ref = rfc3986.iri_reference(iri) with pytest.raises(InvalidAuthority): iri_ref.encode()
def test_iri_equality(iri, uri): assert rfc3986.iri_reference(iri) == iri
def test_encode_iri(iri, uri): assert rfc3986.iri_reference(iri).encode().unsplit() == uri
def test_iri_equality_special_cases(): assert rfc3986.iri_reference(u"http://Bü:ẞ@βόλος.com/β/ό?λ#ος") == \ (u"http", u"Bü:ẞ@βόλος.com", u"/%CE%B2/%CF%8C", u"%CE%BB", u"%CE%BF%CF%82") with pytest.raises(TypeError): rfc3986.iri_reference(u"http://ẞ.com") == 1