def test_idna(self): """International Domain Names should be normalized to safe characters""" self.assertEqual(normalize_url("http://ドメイン.テスト"), "http://xn--eckwd4c7c.xn--zckzah/") self.assertEqual(normalize_url("http://Яндекс.рф"), "http://xn--d1acpjx3f.xn--p1ai/")
def test_normalize_percent_encoding_in_querystring(self): """Percent-encoded querystring should be uppercased""" self.assertEqual(normalize_url("http://example.com/?a=b%7b%7d"), "http://example.com/?a=b%7B%7D")
def test_unicode_query_string(self): """Unicode query strings should be converted to bytes using uft-8 encoding and then properly percent-encoded""" self.assertEqual(normalize_url("http://example.com/?file=résumé.pdf"), "http://example.com/?file=r%C3%A9sum%C3%A9.pdf")
def test_normalized_urls(self): """Already normalized URLs should not change""" self.assertEqual(normalize_url("http://example.com/"), "http://example.com/")
def test_query_sorting(self): """Query strings should be sorted""" self.assertEqual(normalize_url('http://example.com/a?b=1&c=2'), 'http://example.com/a?b=1&c=2') self.assertEqual(normalize_url('http://example.com/a?c=2&b=1'), 'http://example.com/a?b=1&c=2')
def test_lower_case(self): """Normalized URL scheme and host are lower case""" self.assertEqual(normalize_url("HTTP://examPle.cOm/"), "http://example.com/") self.assertEqual(normalize_url("http://example.com/A"), "http://example.com/A")
def make_url(url, params): if is_mapping(params): params = params.items() if params is not None: url = normalize_url(url, extra_query_args=params) return url
def _split_url(response, url): full_url = normalize_url(response.urljoin(url)) return urlparse(full_url)
def test_accept_https(): assert normalize_url('https://example.com/') == 'https://example.com/' assert normalize_url('HTTPS://example.com/') == 'https://example.com/' assert normalize_url('https://EXAMPLE.COM/') == 'https://example.com/'
def test_unsupported_scheme(): with raises(InvalidUrlException) as ex: normalize_url('ftp://example.com') assert str(ex.value) == 'Unsupported scheme.'
def clean_text(self, url, **kwargs): """Perform intensive care on URLs, see `urlnormalizer`.""" try: return normalize_url(url) except UnicodeDecodeError: log.warning("Invalid URL: %r", url)
def test_remove_host_trailing_dot(): assert normalize_url('http://example.com./') == 'http://example.com/'
def test_queries_sorted(): assert normalize_url('http://example.com/?param1=param1val¶m2=param2val') == \ normalize_url('http://example.com/?param2=param2val¶m1=param1val')
def test_missing_scheme(): with raises(MalformatUrlException) as ex: normalize_url('example.com') assert str(ex.value) == 'Missing scheme.'
def test_normalize_ipv6(self): """Normalize ipv6 URLs""" assert normalize_url("[::1]") == "http://[::1]/" assert normalize_url("http://[::1]") == "http://[::1]/" assert normalize_url("[::1]:8080") == "http://[::1]:8080/" assert normalize_url("http://[::1]:8080") == "http://[::1]:8080/"
def test_url_without_path_should_have_trailing_slash(): assert normalize_url('http://example.com') == 'http://example.com/'
def test_non_ideal_inputs(self): """Not the ideal input; but we should handle it anyway""" assert normalize_url("example.com") == "http://example.com/" assert normalize_url("example.com/abc") == "http://example.com/abc" assert normalize_url("//example.com/abc") == "http://example.com/abc"
def test_url_with_path_should_not_have_trailing_slash(): assert normalize_url('http://example.com/part1/part2/') == \ 'http://example.com/part1/part2'
def test_remove_campaign_parameters(): assert normalize_url('http://example.com/?utm_source=WP&utm_medium=box&utm_campaign=Khlwp') == \ 'http://example.com/'
def test_safely_handle_params(): assert normalize_url('http://example.com/part1/part2?q=query') == \ 'http://example.com/part1/part2?q=query'
def check_url(value): value = stringify(value) if value is not None and normalize_url(value) is None: raise ValueError(gettext('Invalid URL.')) return True
def test_fragment_removed(): assert normalize_url('http://example.com/#fragment') == 'http://example.com/' assert normalize_url('http://example.com/#') == 'http://example.com/'
def test_dont_percent_encode_safe_chars_query(self): """Don't percent-encode safe characters in querystring""" self.assertEqual(normalize_url("http://example.com/a/?face=(-.-)"), "http://example.com/a/?face=(-.-)")
def test_percent_encode_querystring(self): """Non-safe characters in query string should be percent-encoded""" self.assertEqual(normalize_url("http://example.com/?a=hello{}"), "http://example.com/?a=hello%7B%7D")
def test_return_type(self): """Should return string""" assert isinstance(normalize_url("http://example.com/"), text_type)
def test_unicode_path(self): """Unicode path should be converted to bytes using utf-8 encoding and then percent-encoded""" self.assertEqual(normalize_url("http://example.com/résumé"), "http://example.com/r%C3%A9sum%C3%A9")
def clean_text(self, url, **kwargs): """Perform intensive care on URLs, see `urlnormalizer`.""" try: return normalize_url(url) except UnicodeDecodeError: return None
def test_dont_change_username_password(self): """Username and password shouldn't be lowercased""" self.assertEqual(normalize_url("http://*****:*****@exaMPLE.COM/"), "http://*****:*****@example.com/")
def clean_text(self, url, **kwargs): """Perform intensive care on URLs, see `urlnormalizer`.""" return normalize_url(url)
def test_append_slash(self): """Append a slash to the end of the URL if it's missing one""" self.assertEqual(normalize_url("http://example.com"), "http://example.com/")
def __init__(self, url): self.original_url = url self.url = normalize_url(url) self.url_hash = hashlib.md5(url.encode('utf-8')).hexdigest() self.cache_path = os.path.join(settings.ARTICLE_CACHE_DIR, self.url_hash)
def test_non_string_input(self): """Non-string input should produce None as result""" assert normalize_url(None) is None assert normalize_url([]) is None assert normalize_url(123) is None
def _validate(self, value): if value is not None and normalize_url(value) is None: raise ValidationError(gettext('Invalid URL.'))
def test_strip_trailing_period(self): self.assertEqual(normalize_url("http://example.com."), "http://example.com/") self.assertEqual(normalize_url("http://example.com./"), "http://example.com/")