def canonicalize(self): """Returns a canonical form of this URI""" new_uri = [] if self.scheme is not None: new_uri.append(self.scheme.lower()) new_uri.append(':') if self.authority is not None: new_uri.append('//') if self.userinfo is not None: new_uri.append(self.userinfo) new_uri.append('@') new_uri.append(self.host.lower()) if self.port: # port could be an empty string port = int(self.port) if port != self.DEFAULT_PORT: new_uri.append(':') new_uri.append("%i" % int(self.port)) if self.abs_path is not None: if not self.abs_path: new_uri.append("/") else: new_uri.append(uri.canonicalize_data(self.abs_path)) elif self.rel_path is not None: new_uri.append(uri.canonicalize_data(self.rel_path)) if self.query is not None: new_uri.append('?') new_uri.append(uri.canonicalize_data(self.query)) if self.fragment is not None: new_uri.append('#') new_uri.append(self.fragment) return uri.URI.from_octets(string.join(new_uri, ''))
def test_canonicalize_data(self): try: uri.canonicalize_data(ul('Caf\xe9')) self.fail("non-ASCII character for canonicalisation") except UnicodeEncodeError: pass self.assertTrue(uri.canonicalize_data( "%2D%5F%2e%21%7e%2A%27%28%29%41%5a%61%7A%30%39") == "-_.!~*'()AZaz09", "unreserved characters are unescaped") self.assertTrue( uri.canonicalize_data('"<[one #word\x09or two\r\n]>"', allowed_test=uri.is_allowed_2396) == '%22%3C%5Bone%20%23word%09or%20two%0D%0A%5D%3E%22', "escape chars neither unreserved nor reserved (rfc2396)") self.assertTrue( uri.canonicalize_data('"<[one #word\x09or two\r\n]>"') == '%22%3C[one%20%23word%09or%20two%0D%0A]%3E%22', "escape chars neither unreserved nor reserved") # passing is_alphanum effectively causes 'marks' to stay as-is self.assertTrue(uri.canonicalize_data( "%2D%5F%2e%21%7e%2A%27%28%29%41%5a%61%7A%30%39", uri.is_alphanum) == "%2D%5F%2E%21%7E%2A%27%28%29AZaz09", "(only) unreserved characters are unescaped") # passing lambda: x:False effectively causes everything to stay as-is self.assertTrue(uri.canonicalize_data( "%2D%5F%2e%21%7e%2A%27%28%29%41%5a%61%7A%30%39", lambda x: False) == "%2D%5F%2E%21%7E%2A%27%28%29%41%5A%61%7A%30%39", "no characters are unescaped")
def canonicalize(self): return uri.URI.from_octets( "%s:%s:%s" % (self.scheme.lower(), self.nid.lower(), uri.canonicalize_data(self.nss, lambda x: False)))