def test_urljoin_no_canonicalize(self): bases = [ 'http://example.com', 'http://example.com/', 'http://example.com/white space', 'http://example.com/white space/foo bar/', 'file://example.com/white space', 'file://example.com/' ] urls = [ '', 'foo/bar', 'white space', 'white space/foo bar', 'http://example2.com/', 'http://example2.com', 'http://example2.com/white space', 'file://a/b/c/d' ] for base in bases: for url in urls: self.assertEqual(scurl.urljoin(base, url), stdlib.urljoin(base, url)) self.assertEqual(scurl.urljoin(url, base), stdlib.urljoin(url, base))
def main(): parser = argparse.ArgumentParser( description='Measure the time of urlsplit and urljoin') parser.add_argument('--encode', action='store_true', help='encode the urls (default: False)') args = parser.parse_args() encode = args.encode urlparse_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: if encode: url = url.encode() start = timer() a = urlparse(url) end = timer() urlparse_time += end - start print("the urlparse time is", urlparse_time / 5, "seconds") urlsplit_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: if encode: url = url.encode() start = timer() a = urlsplit(url) end = timer() urlsplit_time += end - start print("the urlsplit time is", urlsplit_time / 5, "seconds") urljoin_time = 0 for i in range(5): with open('benchmarks/urls/chromiumUrls.txt') as f: for url in f: partial_url = "/asd" if encode: url = url.encode() partial_url = partial_url.encode() start = timer() a = urljoin(url, partial_url) end = timer() urljoin_time += end - start print("the urljoin time is", urljoin_time / 5, "seconds")
def test_check_invalid_urls(self): invalid_urls = [ 'foo//example.com/', 'bar//example.com/', 'foobar//example.com/', 'foobar', '#' ] invalid_urls_2 = ['foobar', 'foo/bar', 'foo/bar/../2'] for invalid_url in invalid_urls: for invalid_url_2 in invalid_urls_2: self.assertEqual(scurl.urljoin(invalid_url, invalid_url_2), stdlib.urljoin(invalid_url, invalid_url_2))
def test_mixed_types_rejected(self): # Several functions that process either strings or ASCII encoded bytes # accept multiple arguments. Check they reject mixed type input with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlparse("www.python.org", b"http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlparse(b"www.python.org", "http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlsplit("www.python.org", b"http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlsplit(b"www.python.org", "http") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunparse(( b"http", "www.python.org","","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunparse(("http", b"www.python.org","","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunsplit((b"http", "www.python.org","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urlunsplit(("http", b"www.python.org","","","")) with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urljoin("http://python.org", b"http://python.org") with self.assertRaisesRegex(TypeError, "Cannot mix str"): scurl.urljoin(b"http://python.org", "http://python.org")
def checkJoin(self, base, relurl, expected): self.assertEqual(scurl.urljoin(base, relurl), expected, (base, relurl, expected))
def checkJoin(self, base, relurl, expected): str_components = (base, relurl, expected) self.assertEqual(scurl.urljoin(base, relurl), expected) bytes_components = baseb, relurlb, expectedb = [ x.encode('ascii') for x in str_components] self.assertEqual(scurl.urljoin(baseb, relurlb), expectedb)