def parse_url(self): """ Parse the proxy url into its component pieces """ # NOTE: If this changes, update tests/regex/proxy.py # # proxy=[protocol://][username[:password]@]host[:port][path][?query][#fragment] # groups (both named and numbered) # 1 = protocol # 2 = username # 3 = password # 4 = host # 5 = port # 6 = path # 7 = query # 8 = fragment m = URL_PARSE.match(self.url) if not m: raise ProxyStringError(_("malformed URL, cannot parse it.")) # If no protocol was given default to http. self.protocol = m.group("protocol") or "http://" if m.group("username"): self.username = ensure_str(unquote(m.group("username"))) if m.group("password"): self.password = ensure_str(unquote(m.group("password"))) if m.group("host"): self.host = m.group("host") if m.group("port"): self.port = m.group("port") else: raise ProxyStringError(_("URL has no host component")) self.parse_components()
def url_regex_test(self): """ Run a list of possible URL values through the regex and check for correct results. tests are in the form of: (URL string, match.groups() tuple) """ tests = [ ("proxy.host", (None, None, None, 'proxy.host', None, None, None, None)), ("proxy.host:3128", (None, None, None, 'proxy.host', '3128', None, None, None)), ("user:[email protected]", (None, 'user', 'password', 'proxy.host', None, None, None, None)), ("*****@*****.**", (None, 'user', None, 'proxy.host', None, None, None, None)), ("user:[email protected]:3128", (None, 'user', 'password', 'proxy.host', '3128', None, None, None)), ("[email protected]:3128", (None, 'user', None, 'proxy.host', '3128', None, None, None)), ("proxy.host/blah/blah", (None, None, None, 'proxy.host', None, '/blah/blah', None, None)), ("proxy.host:3128/blah/blah", (None, None, None, 'proxy.host', '3128', '/blah/blah', None, None)), ("user:[email protected]/blah/blah", (None, 'user', 'password', 'proxy.host', None, '/blah/blah', None, None)), ("[email protected]/blah/blah", (None, 'user', None, 'proxy.host', None, '/blah/blah', None, None)), ("user:[email protected]:3128/blah/blah", (None, 'user', 'password', 'proxy.host', '3128', "/blah/blah", None, None)), ("[email protected]:3128/blah/blah", (None, 'user', None, 'proxy.host', '3128', "/blah/blah", None, None)), ("http://proxy.host", ('http://', None, None, 'proxy.host', None, None, None, None)), ("http://proxy.host:3128", ('http://', None, None, 'proxy.host', '3128', None, None, None)), ("http://*****:*****@proxy.host", ('http://', 'user', 'password', 'proxy.host', None, None, None, None)), ("http://[email protected]", ('http://', 'user', None, 'proxy.host', None, None, None, None)), ("http://*****:*****@proxy.host:3128", ('http://', 'user', 'password', 'proxy.host', '3128', None, None, None)), ("http://[email protected]:3128", ('http://', 'user', None, 'proxy.host', '3128', None, None, None)), ("http://proxy.host/blah/blah", ('http://', None, None, 'proxy.host', None, '/blah/blah', None, None)), ("http://proxy.host:3128/blah/blah", ('http://', None, None, 'proxy.host', '3128', '/blah/blah', None, None)), ("http://*****:*****@proxy.host/blah/blah", ("http://", 'user', 'password', 'proxy.host', None, '/blah/blah', None, None)), ("http://%75ser:[email protected]/blah/blah", ("http://", '%75ser', 'password', 'proxy.host', None, '/blah/blah', None, None)), ("http://*****:*****@proxy.host/blah/blah", ("http://", 'user', '%70assword', 'proxy.host', None, '/blah/blah', None, None)), ("http://[email protected]/blah/blah", ("http://", 'user', None, 'proxy.host', None, '/blah/blah', None, None)), ("http://[email protected]/blah/bla%68", ("http://", 'user', None, 'proxy.host', None, '/blah/bla%68', None, None)), ("http://*****:*****@proxy.host:3128/blah/blah", ("http://", 'user', 'password', 'proxy.host', '3128', '/blah/blah', None, None)), ("http://[email protected]:3128/blah/blah", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', None, None)), ("http://[email protected]:3128/blah/blah?query", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', "query", None)), ("http://[email protected]:3128/blah/blah?query?", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', "query?", None)), ("http://[email protected]:3128/blah/blah?query=whatever", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', "query=whatever", None)), ("http://[email protected]:3128/blah/blah?query=whate%76er", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', "query=whate%76er", None)), ("http://[email protected]:3128/blah/blah?", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', "", None)), ("http://[email protected]:3128/blah/blah#fragment", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', None, "fragment")), ("http://[email protected]:3128/blah/blah#", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', None, "")), ("http://[email protected]:3128/blah/blah#fragm%65nt", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', None, "fragm%65nt")), ("http://[email protected]:3128/blah/blah?query=whatever#fragment", ("http://", 'user', None, 'proxy.host', '3128', '/blah/blah', "query=whatever", "fragment")), # Same, but with IPv4 literals ("1.2.3.4", (None, None, None, '1.2.3.4', None, None, None, None)), ("1.2.3.4:3128", (None, None, None, '1.2.3.4', '3128', None, None, None)), ("user:[email protected]", (None, 'user', 'password', '1.2.3.4', None, None, None, None)), ("[email protected]", (None, 'user', None, '1.2.3.4', None, None, None, None)), ("user:[email protected]:3128", (None, 'user', 'password', '1.2.3.4', '3128', None, None, None)), ("[email protected]:3128", (None, 'user', None, '1.2.3.4', '3128', None, None, None)), ("1.2.3.4/blah/blah", (None, None, None, '1.2.3.4', None, '/blah/blah', None, None)), ("1.2.3.4:3128/blah/blah", (None, None, None, '1.2.3.4', '3128', '/blah/blah', None, None)), ("user:[email protected]/blah/blah", (None, 'user', 'password', '1.2.3.4', None, '/blah/blah', None, None)), ("[email protected]/blah/blah", (None, 'user', None, '1.2.3.4', None, '/blah/blah', None, None)), ("user:[email protected]:3128/blah/blah", (None, 'user', 'password', '1.2.3.4', '3128', "/blah/blah", None, None)), ("[email protected]:3128/blah/blah", (None, 'user', None, '1.2.3.4', '3128', "/blah/blah", None, None)), ("http://1.2.3.4", ('http://', None, None, '1.2.3.4', None, None, None, None)), ("http://1.2.3.4:3128", ('http://', None, None, '1.2.3.4', '3128', None, None, None)), ("http://*****:*****@1.2.3.4", ('http://', 'user', 'password', '1.2.3.4', None, None, None, None)), ("http://[email protected]", ('http://', 'user', None, '1.2.3.4', None, None, None, None)), ("http://*****:*****@1.2.3.4:3128", ('http://', 'user', 'password', '1.2.3.4', '3128', None, None, None)), ("http://[email protected]:3128", ('http://', 'user', None, '1.2.3.4', '3128', None, None, None)), ("http://1.2.3.4/blah/blah", ('http://', None, None, '1.2.3.4', None, '/blah/blah', None, None)), ("http://1.2.3.4:3128/blah/blah", ('http://', None, None, '1.2.3.4', '3128', '/blah/blah', None, None)), ("http://*****:*****@1.2.3.4/blah/blah", ("http://", 'user', 'password', '1.2.3.4', None, '/blah/blah', None, None)), ("http://%75ser:[email protected]/blah/blah", ("http://", '%75ser', 'password', '1.2.3.4', None, '/blah/blah', None, None)), ("http://*****:*****@1.2.3.4/blah/blah", ("http://", 'user', '%70assword', '1.2.3.4', None, '/blah/blah', None, None)), ("http://[email protected]/blah/blah", ("http://", 'user', None, '1.2.3.4', None, '/blah/blah', None, None)), ("http://[email protected]/blah/bla%68", ("http://", 'user', None, '1.2.3.4', None, '/blah/bla%68', None, None)), ("http://*****:*****@1.2.3.4:3128/blah/blah", ("http://", 'user', 'password', '1.2.3.4', '3128', '/blah/blah', None, None)), ("http://[email protected]:3128/blah/blah", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', None, None)), ("http://[email protected]:3128/blah/blah?query", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', "query", None)), ("http://[email protected]:3128/blah/blah?query?", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', "query?", None)), ("http://[email protected]:3128/blah/blah?query=whatever", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', "query=whatever", None)), ("http://[email protected]:3128/blah/blah?query=whate%76er", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', "query=whate%76er", None)), ("http://[email protected]:3128/blah/blah?", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', "", None)), ("http://[email protected]:3128/blah/blah#fragment", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', None, "fragment")), ("http://[email protected]:3128/blah/blah#", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', None, "")), ("http://[email protected]:3128/blah/blah#fragm%65nt", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', None, "fragm%65nt")), ("http://[email protected]:3128/blah/blah?query=whatever#fragment", ("http://", 'user', None, '1.2.3.4', '3128', '/blah/blah', "query=whatever", "fragment")), # An again, but with IPv6 literals ("[dead::beef]", (None, None, None, '[dead::beef]', None, None, None, None)), ("[dead::beef]:3128", (None, None, None, '[dead::beef]', '3128', None, None, None)), ("user:password@[dead::beef]", (None, 'user', 'password', '[dead::beef]', None, None, None, None)), ("user@[dead::beef]", (None, 'user', None, '[dead::beef]', None, None, None, None)), ("user:password@[dead::beef]:3128", (None, 'user', 'password', '[dead::beef]', '3128', None, None, None)), ("user@[dead::beef]:3128", (None, 'user', None, '[dead::beef]', '3128', None, None, None)), ("[dead::beef]/blah/blah", (None, None, None, '[dead::beef]', None, '/blah/blah', None, None)), ("[dead::beef]:3128/blah/blah", (None, None, None, '[dead::beef]', '3128', '/blah/blah', None, None)), ("user:password@[dead::beef]/blah/blah", (None, 'user', 'password', '[dead::beef]', None, '/blah/blah', None, None)), ("user@[dead::beef]/blah/blah", (None, 'user', None, '[dead::beef]', None, '/blah/blah', None, None)), ("user:password@[dead::beef]:3128/blah/blah", (None, 'user', 'password', '[dead::beef]', '3128', "/blah/blah", None, None)), ("user@[dead::beef]:3128/blah/blah", (None, 'user', None, '[dead::beef]', '3128', "/blah/blah", None, None)), ("http://[dead::beef]", ('http://', None, None, '[dead::beef]', None, None, None, None)), ("http://[dead::beef]:3128", ('http://', None, None, '[dead::beef]', '3128', None, None, None)), ("http://*****:*****@[dead::beef]", ('http://', 'user', 'password', '[dead::beef]', None, None, None, None)), ("http://user@[dead::beef]", ('http://', 'user', None, '[dead::beef]', None, None, None, None)), ("http://*****:*****@[dead::beef]:3128", ('http://', 'user', 'password', '[dead::beef]', '3128', None, None, None)), ("http://user@[dead::beef]:3128", ('http://', 'user', None, '[dead::beef]', '3128', None, None, None)), ("http://[dead::beef]/blah/blah", ('http://', None, None, '[dead::beef]', None, '/blah/blah', None, None)), ("http://[dead::beef]:3128/blah/blah", ('http://', None, None, '[dead::beef]', '3128', '/blah/blah', None, None)), ("http://*****:*****@[dead::beef]/blah/blah", ("http://", 'user', 'password', '[dead::beef]', None, '/blah/blah', None, None)), ("http://%75ser:password@[dead::beef]/blah/blah", ("http://", '%75ser', 'password', '[dead::beef]', None, '/blah/blah', None, None)), ("http://*****:*****@[dead::beef]/blah/blah", ("http://", 'user', '%70assword', '[dead::beef]', None, '/blah/blah', None, None)), ("http://user@[dead::beef]/blah/blah", ("http://", 'user', None, '[dead::beef]', None, '/blah/blah', None, None)), ("http://user@[dead::beef]/blah/bla%68", ("http://", 'user', None, '[dead::beef]', None, '/blah/bla%68', None, None)), ("http://*****:*****@[dead::beef]:3128/blah/blah", ("http://", 'user', 'password', '[dead::beef]', '3128', '/blah/blah', None, None)), ("http://user@[dead::beef]:3128/blah/blah", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', None, None)), ("http://user@[dead::beef]:3128/blah/blah?query", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', "query", None)), ("http://user@[dead::beef]:3128/blah/blah?query?", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', "query?", None)), ("http://user@[dead::beef]:3128/blah/blah?query=whatever", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', "query=whatever", None)), ("http://user@[dead::beef]:3128/blah/blah?query=whate%76er", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', "query=whate%76er", None)), ("http://user@[dead::beef]:3128/blah/blah?", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', "", None)), ("http://user@[dead::beef]:3128/blah/blah#fragment", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', None, "fragment")), ("http://user@[dead::beef]:3128/blah/blah#", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', None, "")), ("http://user@[dead::beef]:3128/blah/blah#fragm%65nt", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', None, "fragm%65nt")), ("http://user@[dead::beef]:3128/blah/blah?query=whatever#fragment", ("http://", 'user', None, '[dead::beef]', '3128', '/blah/blah', "query=whatever", "fragment")), # Invalid schemes ("0http://proxy.host/", None), ("h~ttp://proxy.host/", None), # Invalid usernames and passwords ("http://%[email protected]/", None), ("http://*[email protected]/", None), ("http://*****:*****@proxy.host/", None), ("http://*****:*****@[email protected]/", None), # Invalid paths ("http://*****:*****@proxy.host/%xxlah/blah", None), ("http://*****:*****@proxy.host/[]lah/blah", None), # Invalid queries ("http://proxy.host/blah/blah?quer%xx", None), ("http://proxy.host/blah/blah?que[]y", None), # Invalid fragments ("http://proxy.host/blah/blah#fragment#", None), ("http://proxy.host/blah/blah#%xxragment", None), # Unbracketed IPv6 ("fe80::1234:56:78", None), ("fe80::1234:56:78/blah/blah", None), ("http://fe80::1234:56:78/blah/blah", None) ] got_error = False for proxy, result in tests: match = URL_PARSE.match(proxy) if match: match = match.groups() else: match = None try: self.assertEqual(match, result) except AssertionError: got_error = True print("Proxy parse error: `%s' did not parse as `%s': %s" % (proxy, result, match)) if got_error: self.fail()