def test_urlsplit_scoped_IPv6(self): p = scurl.urlsplit('http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') self.assertEqual(p.hostname, "fe80::822a:a8ff:fe49:470c%tESt") self.assertEqual(p.netloc, '[FE80::822a:a8ff:fe49:470c%tESt]:1234') p = scurl.urlsplit(b'http://[FE80::822a:a8ff:fe49:470c%tESt]:1234') self.assertEqual(p.hostname, b"fe80::822a:a8ff:fe49:470c%tESt") self.assertEqual(p.netloc, b'[FE80::822a:a8ff:fe49:470c%tESt]:1234')
def test_urlsplit_attributes(self): url = "HTTP://WWW.PYTHON.ORG/doc/#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "WWW.PYTHON.ORG") self.assertEqual(p.path, "/doc/") self.assertEqual(p.query, "") self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, None) # geturl() won't return exactly the original URL in this case # since the scheme is always case-normalized #self.assertEqual(p.geturl(), url) url = "http://*****:*****@www.python.org:080/doc/?query=yes#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "User:[email protected]:080") self.assertEqual(p.path, "/doc/") self.assertEqual(p.query, "query=yes") self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, "User") self.assertEqual(p.password, "Pass") self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) # Addressing issue1698, which suggests Username can contain # "@" characters. Though not RFC compliant, many ftp sites allow # and request email addresses as usernames. url = "http://[email protected]:[email protected]:080/doc/?query=yes#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "[email protected]:[email protected]:080") self.assertEqual(p.path, "/doc/") self.assertEqual(p.query, "query=yes") self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, "*****@*****.**") self.assertEqual(p.password, "Pass") self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) # Verify an illegal port of value greater than 65535 is set as None # NOTE: this should raise the Value error since port is out of range (just like in py3) # SCURL has changed this test case url = "http://www.python.org:65536" p = scurl.urlsplit(url) with self.assertRaises(ValueError): p.port
def checkRoundtrips(self, url, parsed, split): result = scurl.urlparse(url) self.assertEqual(result, parsed) t = (result.scheme, result.netloc, result.path, result.params, result.query, result.fragment) self.assertEqual(t, parsed) # put it back together and it should be the same result2 = scurl.urlunparse(result) self.assertEqual(result2, url) self.assertEqual(result2, result.geturl()) # the result of geturl() is a fixpoint; we can always parse it # again to get the same result: result3 = scurl.urlparse(result.geturl()) self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3, result) self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.netloc, result.netloc) self.assertEqual(result3.path, result.path) self.assertEqual(result3.params, result.params) self.assertEqual(result3.query, result.query) self.assertEqual(result3.fragment, result.fragment) self.assertEqual(result3.username, result.username) self.assertEqual(result3.password, result.password) self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port) # check the roundtrip using urlsplit() as well result = scurl.urlsplit(url) self.assertEqual(result, split) t = (result.scheme, result.netloc, result.path, result.query, result.fragment) self.assertEqual(t, split) result2 = scurl.urlunsplit(result) self.assertEqual(result2, url) self.assertEqual(result2, result.geturl()) # check the fixpoint property of re-parsing the result of geturl() result3 = scurl.urlsplit(result.geturl()) self.assertEqual(result3.geturl(), result.geturl()) self.assertEqual(result3, result) self.assertEqual(result3.scheme, result.scheme) self.assertEqual(result3.netloc, result.netloc) self.assertEqual(result3.path, result.path) self.assertEqual(result3.query, result.query) self.assertEqual(result3.fragment, result.fragment) self.assertEqual(result3.username, result.username) self.assertEqual(result3.password, result.password) self.assertEqual(result3.hostname, result.hostname) self.assertEqual(result3.port, result.port)
def test_issue14072(self): p1 = scurl.urlsplit('tel:+31-641044153') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+31-641044153') p2 = scurl.urlsplit('tel:+31641044153') self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153') # assert the behavior for urlparse p1 = scurl.urlparse('tel:+31-641044153') self.assertEqual(p1.scheme, 'tel') self.assertEqual(p1.path, '+31-641044153') p2 = scurl.urlparse('tel:+31641044153') self.assertEqual(p2.scheme, 'tel') self.assertEqual(p2.path, '+31641044153')
def main(): parser = argparse.ArgumentParser( description='Measure the time of urlsplit and urljoin') parser.add_argument('--encode', action='store_true', help='encode the urls (default: False)') args = parser.parse_args() encode = args.encode urlparse_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: if encode: url = url.encode() start = timer() a = urlparse(url) end = timer() urlparse_time += end - start print("the urlparse time is", urlparse_time / 5, "seconds") urlsplit_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: if encode: url = url.encode() start = timer() a = urlsplit(url) end = timer() urlsplit_time += end - start print("the urlsplit time is", urlsplit_time / 5, "seconds") urljoin_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: partial_url = "/asd" if encode: url = url.encode() partial_url = partial_url.encode() start = timer() a = urljoin(url, partial_url) end = timer() urljoin_time += end - start print("the urljoin time is", urljoin_time / 5, "seconds")
def test_attributes_bad_port(self): """Check handling of non-integer ports.""" p = scurl.urlsplit("http://www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port) p = scurl.urlparse("http://www.example.net:foo") self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port)
def test_port_casting_failure_message(self): message = "Port could not be" p1 = scurl.urlparse('http://Server=sde; Service=sde:oracle') with self.assertRaisesRegex(ValueError, message): p1.port p2 = scurl.urlsplit('http://Server=sde; Service=sde:oracle') with self.assertRaisesRegex(ValueError, message): p2.port
def test_unparse_parse(self): for u in [ 'Python', './Python', 'x-newscheme://foo.com/stuff', 'x://y', 'x:/y', 'x:/', '/', ]: self.assertEqual(scurl.urlunsplit(scurl.urlsplit(u)), u) self.assertEqual(scurl.urlunparse(scurl.urlparse(u)), u)
def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled # in, but doesn't. Since it's a URI and doesn't use the # scheme://netloc syntax, the netloc and related attributes # should be left empty. uri = "sip:[email protected];maddr=239.255.255.1;ttl=15" p = scurl.urlsplit(uri) self.assertEqual(p.netloc, "") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) p = scurl.urlparse(uri) self.assertEqual(p.netloc, "") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) # You guessed it, repeating the test with bytes input uri = b"sip:[email protected];maddr=239.255.255.1;ttl=15" p = scurl.urlsplit(uri) self.assertEqual(p.netloc, b"") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) p = scurl.urlparse(uri) self.assertEqual(p.netloc, b"") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, None) self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri)
def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes # accept multiple arguments. Check they reject mixed type input with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlparse("www.python.org", b"http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlparse(b"www.python.org", "http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlsplit("www.python.org", b"http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlsplit(b"www.python.org", "http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunparse(( b"http", "www.python.org","","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunparse(("http", b"www.python.org","","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunsplit((b"http", "www.python.org","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunsplit(("http", b"www.python.org","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urljoin("http://python.org", b"http://python.org") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urljoin(b"http://python.org", "http://python.org")
def run_urlsplit(urls): for url in urls: a = scurl.urlsplit(url)
def test_urlsplit_attributes(self): url = "HTTP://WWW.PYTHON.ORG/doc/#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "WWW.PYTHON.ORG") self.assertEqual(p.path, "/doc/") self.assertEqual(p.query, "") self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, None) # geturl() won't return exactly the original URL in this case # since the scheme is always case-normalized # We handle this by ignoring the first 4 characters of the URL self.assertEqual(p.geturl()[4:], url[4:]) url = "http://*****:*****@www.python.org:080/doc/?query=yes#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "User:[email protected]:080") self.assertEqual(p.path, "/doc/") self.assertEqual(p.query, "query=yes") self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, "User") self.assertEqual(p.password, "Pass") self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) # Addressing issue1698, which suggests Username can contain # "@" characters. Though not RFC compliant, many ftp sites allow # and request email addresses as usernames. url = "http://[email protected]:[email protected]:080/doc/?query=yes#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, "http") self.assertEqual(p.netloc, "[email protected]:[email protected]:080") self.assertEqual(p.path, "/doc/") self.assertEqual(p.query, "query=yes") self.assertEqual(p.fragment, "frag") self.assertEqual(p.username, "*****@*****.**") self.assertEqual(p.password, "Pass") self.assertEqual(p.hostname, "www.python.org") self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) # And check them all again, only with bytes this time url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, b"http") self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") self.assertEqual(p.path, b"/doc/") self.assertEqual(p.query, b"") self.assertEqual(p.fragment, b"frag") self.assertEqual(p.username, None) self.assertEqual(p.password, None) self.assertEqual(p.hostname, b"www.python.org") self.assertEqual(p.port, None) self.assertEqual(p.geturl()[4:], url[4:]) url = b"http://*****:*****@www.python.org:080/doc/?query=yes#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, b"http") self.assertEqual(p.netloc, b"User:[email protected]:080") self.assertEqual(p.path, b"/doc/") self.assertEqual(p.query, b"query=yes") self.assertEqual(p.fragment, b"frag") self.assertEqual(p.username, b"User") self.assertEqual(p.password, b"Pass") self.assertEqual(p.hostname, b"www.python.org") self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) url = b"http://[email protected]:[email protected]:080/doc/?query=yes#frag" p = scurl.urlsplit(url) self.assertEqual(p.scheme, b"http") self.assertEqual(p.netloc, b"[email protected]:[email protected]:080") self.assertEqual(p.path, b"/doc/") self.assertEqual(p.query, b"query=yes") self.assertEqual(p.fragment, b"frag") self.assertEqual(p.username, b"*****@*****.**") self.assertEqual(p.password, b"Pass") self.assertEqual(p.hostname, b"www.python.org") self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) # Verify an illegal port raises ValueError url = b"HTTP://WWW.PYTHON.ORG:65536/doc/#frag" p = scurl.urlsplit(url) with self.assertRaisesRegex(ValueError, "out of range"): p.port
def test_unparse_parse(self): str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] bytes_cases = [x.encode('ascii') for x in str_cases] for u in str_cases + bytes_cases: self.assertEqual(scurl.urlunsplit(scurl.urlsplit(u)), u) self.assertEqual(scurl.urlunparse(scurl.urlparse(u)), u)