def test_iri_support(self): self.assert_strict_equal(urls.uri_to_iri('http://xn--n3h.net/'), u'http://\u2603.net/') self.assert_strict_equal( urls.uri_to_iri( b'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th'), u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th') self.assert_strict_equal(urls.iri_to_uri(u'http://☃.net/'), 'http://xn--n3h.net/') self.assert_strict_equal( urls.iri_to_uri(u'http://üser:pässword@☃.net/påth'), 'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th') self.assert_strict_equal( urls.uri_to_iri('http://test.com/%3Fmeh?foo=%26%2F'), u'http://test.com/%3Fmeh?foo=%26%2F') # this should work as well, might break on 2.4 because of a broken # idna codec self.assert_strict_equal(urls.uri_to_iri(b'/foo'), u'/foo') self.assert_strict_equal(urls.iri_to_uri(u'/foo'), '/foo') self.assert_strict_equal( urls.iri_to_uri(u'http://föö.com:8080/bam/baz'), 'http://xn--f-1gaa.com:8080/bam/baz')
def test_iri_support(): strict_eq(urls.uri_to_iri("http://xn--n3h.net/"), u"http://\u2603.net/") strict_eq( urls.uri_to_iri(b"http://%C3%BCser:p%C3%[email protected]/p%C3%A5th"), u"http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th", ) strict_eq(urls.iri_to_uri(u"http://☃.net/"), "http://xn--n3h.net/") strict_eq( urls.iri_to_uri(u"http://üser:pässword@☃.net/påth"), "http://%C3%BCser:p%C3%[email protected]/p%C3%A5th", ) strict_eq( urls.uri_to_iri("http://test.com/%3Fmeh?foo=%26%2F"), u"http://test.com/%3Fmeh?foo=%26%2F", ) # this should work as well, might break on 2.4 because of a broken # idna codec strict_eq(urls.uri_to_iri(b"/foo"), u"/foo") strict_eq(urls.iri_to_uri(u"/foo"), "/foo") strict_eq( urls.iri_to_uri(u"http://föö.com:8080/bam/baz"), "http://xn--f-1gaa.com:8080/bam/baz", )
def test_iri_support(): assert urls.uri_to_iri("http://xn--n3h.net/") == "http://\u2603.net/" assert ( urls.uri_to_iri(b"http://%C3%BCser:p%C3%[email protected]/p%C3%A5th") == "http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th" ) assert urls.iri_to_uri("http://☃.net/") == "http://xn--n3h.net/" assert ( urls.iri_to_uri("http://üser:pässword@☃.net/påth") == "http://%C3%BCser:p%C3%[email protected]/p%C3%A5th" ) assert ( urls.uri_to_iri("http://test.com/%3Fmeh?foo=%26%2F") == "http://test.com/%3Fmeh?foo=%26%2F" ) # this should work as well, might break on 2.4 because of a broken # idna codec assert urls.uri_to_iri(b"/foo") == "/foo" assert urls.iri_to_uri("/foo") == "/foo" assert ( urls.iri_to_uri("http://föö.com:8080/bam/baz") == "http://xn--f-1gaa.com:8080/bam/baz" )
def get_current_url( environ, root_only=False, strip_querystring=False, host_only=False, trusted_hosts=None, ): """A handy helper function that recreates the full URL as IRI for the current request or parts of it. Here's an example: >>> from werkzeug.test import create_environ >>> env = create_environ("/?param=foo", "http://localhost/script") >>> get_current_url(env) 'http://localhost/script/?param=foo' >>> get_current_url(env, root_only=True) 'http://localhost/script/' >>> get_current_url(env, host_only=True) 'http://localhost/' >>> get_current_url(env, strip_querystring=True) 'http://localhost/script/' This optionally it verifies that the host is in a list of trusted hosts. If the host is not in there it will raise a :exc:`~werkzeug.exceptions.SecurityError`. Note that the string returned might contain unicode characters as the representation is an IRI not an URI. If you need an ASCII only representation you can use the :func:`~werkzeug.urls.iri_to_uri` function: >>> from werkzeug.urls import iri_to_uri >>> iri_to_uri(get_current_url(env)) 'http://localhost/script/?param=foo' :param environ: the WSGI environment to get the current URL from. :param root_only: set `True` if you only want the root URL. :param strip_querystring: set to `True` if you don't want the querystring. :param host_only: set to `True` if the host URL should be returned. :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` for more information. """ tmp = [environ["wsgi.url_scheme"], "://", get_host(environ, trusted_hosts)] cat = tmp.append if host_only: return uri_to_iri("".join(tmp) + "/") cat(url_quote(wsgi_get_bytes(environ.get("SCRIPT_NAME", ""))).rstrip("/")) cat("/") if not root_only: cat( url_quote( wsgi_get_bytes(environ.get("PATH_INFO", "")).lstrip(b"/"))) if not strip_querystring: qs = get_query_string(environ) if qs: cat("?" + qs) return uri_to_iri("".join(tmp))
def get_current_url(environ, root_only=False, strip_querystring=False, host_only=False, trusted_hosts=None): """A handy helper function that recreates the full URL for the current request or parts of it. Here an example: >>> from werkzeug.test import create_environ >>> env = create_environ("/?param=foo", "http://localhost/script") >>> get_current_url(env) 'http://localhost/script/?param=foo' >>> get_current_url(env, root_only=True) 'http://localhost/script/' >>> get_current_url(env, host_only=True) 'http://localhost/' >>> get_current_url(env, strip_querystring=True) 'http://localhost/script/' This optionally it verifies that the host is in a list of trusted hosts. If the host is not in there it will raise a :exc:`~werkzeug.exceptions.SecurityError`. :param environ: the WSGI environment to get the current URL from. :param root_only: set `True` if you only want the root URL. :param strip_querystring: set to `True` if you don't want the querystring. :param host_only: set to `True` if the host URL should be returned. :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` for more information. """ from werkzeug.urls import uri_to_iri tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)] cat = tmp.append if host_only: return uri_to_iri(''.join(tmp) + '/') cat(urls.url_quote(environ.get('SCRIPT_NAME', '').rstrip('/'))) if root_only: cat('/') else: cat(urls.url_quote('/' + environ.get('PATH_INFO', '').lstrip('/'))) if not strip_querystring: qs = environ.get('QUERY_STRING') if qs: # QUERY_STRING really should be ascii safe but some browsers # will send us some unicode stuff (I am looking at you IE). # In that case we want to urllib quote it badly. try: if hasattr(qs, 'decode'): qs.decode('ascii') else: qs.encode('ascii') except UnicodeError: qs = ''.join(x > 127 and '%%%02X' % x or c for x, c in ((ord(x), x) for x in qs)) cat('?' + qs) return uri_to_iri(''.join(tmp))
def get_current_url(environ, root_only=False, strip_querystring=False, host_only=False, trusted_hosts=None): """A handy helper function that recreates the full URL as IRI for the current request or parts of it. Here an example: >>> from werkzeug.test import create_environ >>> env = create_environ("/?param=foo", "http://localhost/script") >>> get_current_url(env) 'http://localhost/script/?param=foo' >>> get_current_url(env, root_only=True) 'http://localhost/script/' >>> get_current_url(env, host_only=True) 'http://localhost/' >>> get_current_url(env, strip_querystring=True) 'http://localhost/script/' This optionally it verifies that the host is in a list of trusted hosts. If the host is not in there it will raise a :exc:`~werkzeug.exceptions.SecurityError`. Note that the string returned might contain unicode characters as the representation is an IRI not an URI. If you need an ASCII only representation you can use the :func:`~werkzeug.urls.iri_to_uri` function: >>> from werkzeug.urls import iri_to_uri >>> iri_to_uri(get_current_url(env)) 'http://localhost/script/?param=foo' :param environ: the WSGI environment to get the current URL from. :param root_only: set `True` if you only want the root URL. :param strip_querystring: set to `True` if you don't want the querystring. :param host_only: set to `True` if the host URL should be returned. :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` for more information. """ tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)] cat = tmp.append if host_only: return uri_to_iri(''.join(tmp) + '/') cat(url_quote(wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))).rstrip('/')) cat('/') if not root_only: cat(url_quote(wsgi_get_bytes(environ.get('PATH_INFO', '')).lstrip(b'/'))) if not strip_querystring: qs = get_query_string(environ) if qs: cat('?' + qs) return uri_to_iri(''.join(tmp))
def log_request(self, code='-', size='-'): try: path = uri_to_iri(self.path) msg = "%s %s %s" % (self.command, path, self.request_version) except AttributeError: # path isn't set if the requestline was bad msg = self.requestline code = str(code) if termcolor: color = termcolor.colored if code[0] == '1': # 1xx - Informational msg = color(msg, attrs=['bold']) elif code[0] == '2': # 2xx - Success msg = color(msg, color='white') elif code == '304': # 304 - Resource Not Modified msg = color(msg, color='cyan') elif code[0] == '3': # 3xx - Redirection msg = color(msg, color='green') elif code == '404': # 404 - Resource Not Found msg = color(msg, color='yellow') elif code[0] == '4': # 4xx - Client Error msg = color(msg, color='red', attrs=['bold']) else: # 5xx, or any other response msg = color(msg, color='magenta', attrs=['bold']) self.log('info', '"%s" %s %s', msg, code, size)
def log_request(self, code="-", size="-"): try: path = uri_to_iri(self.path) msg = "%s %s %s" % (self.command, path, self.request_version) except AttributeError: # path isn't set if the requestline was bad msg = self.requestline code = str(code) if click: color = click.style if code[0] == "1": # 1xx - Informational msg = color(msg, bold=True) elif code[0] == "2": # 2xx - Success msg = color(msg, fg="white") elif code == "304": # 304 - Resource Not Modified msg = color(msg, fg="cyan") elif code[0] == "3": # 3xx - Redirection msg = color(msg, fg="green") elif code == "404": # 404 - Resource Not Found msg = color(msg, fg="yellow") elif code[0] == "4": # 4xx - Client Error msg = color(msg, fg="red", bold=True) else: # 5xx, or any other response msg = color(msg, fg="magenta", bold=True) self.log("access", '"%s" %s %s', msg, code, size)
def test_iri_support(self): self.assert_raises(UnicodeError, urls.uri_to_iri, u'http://föö.com/') self.assert_raises(UnicodeError, urls.iri_to_uri, 'http://föö.com/') assert urls.uri_to_iri('http://xn--n3h.net/') == u'http://\u2603.net/' assert urls.uri_to_iri('http://%C3%BCser:p%C3%[email protected]/p%C3%A5th') == \ u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th' assert urls.iri_to_uri(u'http://☃.net/') == 'http://xn--n3h.net/' assert urls.iri_to_uri(u'http://üser:pässword@☃.net/påth') == \ 'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th' assert urls.uri_to_iri('http://test.com/%3Fmeh?foo=%26%2F') == \ u'http://test.com/%3Fmeh?foo=%26%2F' # this should work as well, might break on 2.4 because of a broken # idna codec assert urls.uri_to_iri('/foo') == u'/foo' assert urls.iri_to_uri(u'/foo') == '/foo'
def test_iri_support(self): self.assert_raises(UnicodeError, urls.uri_to_iri, u'http://föö.com/') self.assert_raises(UnicodeError, urls.iri_to_uri, 'http://föö.com/') assert urls.uri_to_iri('http://xn--n3h.net/') == u'http://\u2603.net/' assert urls.uri_to_iri('http://%C3%BCser:p%C3%[email protected]/p%C3%A5th#%C3%A5nchor') == \ u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th#\xe5nchor' assert urls.iri_to_uri(u'http://☃.net/') == 'http://xn--n3h.net/' assert urls.iri_to_uri(u'http://üser:pässword@☃.net/påth#ånchor') == \ 'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th#%C3%A5nchor' assert urls.uri_to_iri('http://test.com/%3Fmeh?foo=%26%2F') == \ u'http://test.com/%3Fmeh?foo=%26%2F' # this should work as well, might break on 2.4 because of a broken # idna codec assert urls.uri_to_iri('/foo') == u'/foo' assert urls.iri_to_uri(u'/foo') == '/foo'
def get_current_url(environ, root_only=False, strip_querystring=False, host_only=False, trusted_hosts=None): """A handy helper function that recreates the full URL for the current request or parts of it. Here an example: >>> from werkzeug.test import create_environ >>> env = create_environ("/?param=foo", "http://localhost/script") >>> get_current_url(env) 'http://localhost/script/?param=foo' >>> get_current_url(env, root_only=True) 'http://localhost/script/' >>> get_current_url(env, host_only=True) 'http://localhost/' >>> get_current_url(env, strip_querystring=True) 'http://localhost/script/' This optionally it verifies that the host is in a list of trusted hosts. If the host is not in there it will raise a :exc:`~werkzeug.exceptions.SecurityError`. :param environ: the WSGI environment to get the current URL from. :param root_only: set `True` if you only want the root URL. :param strip_querystring: set to `True` if you don't want the querystring. :param host_only: set to `True` if the host URL should be returned. :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` for more information. """ tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)] cat = tmp.append if host_only: return uri_to_iri(''.join(tmp) + '/') cat(url_quote(wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))).rstrip('/')) cat('/') if not root_only: cat( url_quote( wsgi_get_bytes(environ.get('PATH_INFO', '')).lstrip(b'/'))) if not strip_querystring: qs = get_query_string(environ) if qs: cat('?' + qs) return uri_to_iri(''.join(tmp))
def test_uri_iri_normalization(): uri = "http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93" iri = u"http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713" tests = [ u"http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713", u"http://xn--f-rgao.com/\u2610/fred?utf8=\N{CHECK MARK}", b"http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93", u"http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93", u"http://föñ.com/\u2610/fred?utf8=%E2%9C%93", b"http://xn--f-rgao.com/\xe2\x98\x90/fred?utf8=\xe2\x9c\x93", ] for test in tests: assert urls.uri_to_iri(test) == iri assert urls.iri_to_uri(test) == uri assert urls.uri_to_iri(urls.iri_to_uri(test)) == iri assert urls.iri_to_uri(urls.uri_to_iri(test)) == uri assert urls.uri_to_iri(urls.uri_to_iri(test)) == iri assert urls.iri_to_uri(urls.iri_to_uri(test)) == uri
def test_uri_iri_normalization(): uri = 'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93' iri = u'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713' tests = [ u'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713', u'http://xn--f-rgao.com/\u2610/fred?utf8=\N{CHECK MARK}', b'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93', u'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93', u'http://föñ.com/\u2610/fred?utf8=%E2%9C%93', b'http://xn--f-rgao.com/\xe2\x98\x90/fred?utf8=\xe2\x9c\x93', ] for test in tests: assert urls.uri_to_iri(test) == iri assert urls.iri_to_uri(test) == uri assert urls.uri_to_iri(urls.iri_to_uri(test)) == iri assert urls.iri_to_uri(urls.uri_to_iri(test)) == uri assert urls.uri_to_iri(urls.uri_to_iri(test)) == iri assert urls.iri_to_uri(urls.iri_to_uri(test)) == uri
def test_uri_iri_normalization(self): uri = 'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93' iri = u'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713' tests = [ u'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713', u'http://xn--f-rgao.com/\u2610/fred?utf8=\N{CHECK MARK}', b'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93', u'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93', u'http://föñ.com/\u2610/fred?utf8=%E2%9C%93', b'http://xn--f-rgao.com/\xe2\x98\x90/fred?utf8=\xe2\x9c\x93', ] for test in tests: self.assert_equal(urls.uri_to_iri(test), iri) self.assert_equal(urls.iri_to_uri(test), uri) self.assert_equal(urls.uri_to_iri(urls.iri_to_uri(test)), iri) self.assert_equal(urls.iri_to_uri(urls.uri_to_iri(test)), uri) self.assert_equal(urls.uri_to_iri(urls.uri_to_iri(test)), iri) self.assert_equal(urls.iri_to_uri(urls.iri_to_uri(test)), uri)
def test_iri_support(self): self.assert_raises(UnicodeError, urls.uri_to_iri, u"http://föö.com/") self.assert_raises(UnicodeError, urls.iri_to_uri, "http://föö.com/") assert urls.uri_to_iri("http://xn--n3h.net/") == u"http://\u2603.net/" assert ( urls.uri_to_iri("http://%C3%BCser:p%C3%[email protected]/p%C3%A5th") == u"http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th" ) assert urls.iri_to_uri(u"http://☃.net/") == "http://xn--n3h.net/" assert ( urls.iri_to_uri(u"http://üser:pässword@☃.net/påth") == "http://%C3%BCser:p%C3%[email protected]/p%C3%A5th" ) assert urls.uri_to_iri("http://test.com/%3Fmeh?foo=%26%2F") == u"http://test.com/%3Fmeh?foo=%26%2F" # this should work as well, might break on 2.4 because of a broken # idna codec assert urls.uri_to_iri("/foo") == u"/foo" assert urls.iri_to_uri(u"/foo") == "/foo"
def test_iri_support(self): self.assert_strict_equal(urls.uri_to_iri('http://xn--n3h.net/'), u'http://\u2603.net/') self.assert_strict_equal( urls.uri_to_iri(b'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th'), u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th') self.assert_strict_equal(urls.iri_to_uri(u'http://☃.net/'), 'http://xn--n3h.net/') self.assert_strict_equal( urls.iri_to_uri(u'http://üser:pässword@☃.net/påth'), 'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th') self.assert_strict_equal(urls.uri_to_iri('http://test.com/%3Fmeh?foo=%26%2F'), u'http://test.com/%3Fmeh?foo=%26%2F') # this should work as well, might break on 2.4 because of a broken # idna codec self.assert_strict_equal(urls.uri_to_iri(b'/foo'), u'/foo') self.assert_strict_equal(urls.iri_to_uri(u'/foo'), '/foo') self.assert_strict_equal(urls.iri_to_uri(u'http://föö.com:8080/bam/baz'), 'http://xn--f-1gaa.com:8080/bam/baz')
def get_current_url(environ, root_only=False, strip_querystring=False, host_only=False, trusted_hosts=None): """A handy helper function that recreates the full URL for the current request or parts of it. Here an example: >>> from werkzeug.test import create_environ >>> env = create_environ("/?param=foo", "http://localhost/script") >>> get_current_url(env) 'http://localhost/script/?param=foo' >>> get_current_url(env, root_only=True) 'http://localhost/script/' >>> get_current_url(env, host_only=True) 'http://localhost/' >>> get_current_url(env, strip_querystring=True) 'http://localhost/script/' This optionally it verifies that the host is in a list of trusted hosts. If the host is not in there it will raise a :exc:`~werkzeug.exceptions.SecurityError`. :param environ: the WSGI environment to get the current URL from. :param root_only: set `True` if you only want the root URL. :param strip_querystring: set to `True` if you don't want the querystring. :param host_only: set to `True` if the host URL should be returned. :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted` for more information. """ tmp = [environ["wsgi.url_scheme"], "://", get_host(environ, trusted_hosts)] cat = tmp.append if host_only: return uri_to_iri("".join(tmp) + "/") cat(url_quote(wsgi_get_bytes(environ.get("SCRIPT_NAME", ""))).rstrip("/")) cat("/") if not root_only: cat(url_quote(wsgi_get_bytes(environ.get("PATH_INFO", "")).lstrip(b"/"))) if not strip_querystring: qs = get_query_string(environ) if qs: cat("?" + qs) return uri_to_iri("".join(tmp))
def log_request(self, code="-", size="-"): try: path = uri_to_iri(self.path) except AttributeError: # path isn't set if the requestline was bad path = self.requestline record = { "method": self.command, "path": path, "http_ver": self.request_version, "http_code": str(code), "size": size, "client_addr": self.headers.get('x-real-ip', self.address_string()) } http_logger.info('request%s', '', extra=record)
def _as_iri(obj): if not isinstance(obj, text_type): return uri_to_iri(obj, charset, errors) return obj
def test_uri_to_iri_to_uri(self): uri = 'http://xn--f-rgao.com/%C3%9E' iri = urls.uri_to_iri(uri) self.assert_equal(urls.iri_to_uri(iri), uri)
def test_uri_to_iri_idempotence_non_ascii(): uri = 'http://xn--n3h/%E2%98%83' uri = urls.uri_to_iri(uri) assert urls.uri_to_iri(uri) == uri
def test_iri_to_uri_to_iri(): iri = u'http://föö.com/' uri = urls.iri_to_uri(iri) assert urls.uri_to_iri(uri) == iri
def test_uri_to_iri_idempotence_ascii_only(): uri = 'http://www.idempoten.ce' uri = urls.uri_to_iri(uri) assert urls.uri_to_iri(uri) == uri
def extract_path_info(environ_or_baseurl, path_or_url, charset='utf-8', errors='replace', collapse_http_schemes=True): """Extracts the path info from the given URL (or WSGI environment) and path. The path info returned is a unicode string, not a bytestring suitable for a WSGI environment. The URLs might also be IRIs. If the path info could not be determined, `None` is returned. Some examples: >>> extract_path_info(myapps, myapps) myapps'/helplanner >>> extract_path_info(myapps, ... plannerplanner u'/hello' >>> extract_path_info(myapps, myapps.. myapps, ... collapse_httpplannerhemes=False) is None True Instead of provplannerng a base URL you can also pass a WSGI environment. .. versionadded:: 0.6 :param environ_or_baseurl: a WSGI environment dict, a base URL or base IRI. This is the root of the application. :param path_or_url: an absolute path from the server root, a relative path (in which case it's the path info) or a full URL. Also accepts IRIs and unicode parameters. :param charset: the charset for byte data in URLs :param errors: the error handling on decode :param collapse_http_schemes: if set to `False` the algorithm does not assume that http and https on the same server point to the same resource. """ def _normalize_netloc(scheme, netloc): parts = netloc.split(u'@', 1)[-1].split(u':', 1) if len(parts) == 2: netloc, port = parts if (scheme == u'http' and port == u'80') or \ (scheme == u'https' and port == u'443'): port = None else: netloc = parts[0] port = None if port is not None: netloc += u':' + port return netloc # make sure whatever we are working on is a IRI and parse it path = uri_to_iri(path_or_url, charset, errors) if isinstance(environ_or_baseurl, dict): environ_or_baseurl = get_current_url(environ_or_baseurl, root_only=True) base_iri = uri_to_iri(environ_or_baseurl, charset, errors) base_scheme, base_netloc, base_path = url_parse(base_iri)[:3] cur_scheme, cur_netloc, cur_path, = \ url_parse(url_join(base_iri, path))[:3] # normalize the network location base_netloc = _normalize_netloc(base_scheme, base_netloc) cur_netloc = _normalize_netloc(cur_scheme, cur_netloc) # is that IRI even on a known HTTP scheme? if collapse_http_schemes: for scheme in base_scheme, cur_scheme: if scheme not in (u'http', u'https'): return None else: if not (base_scheme in (u'http', u'https') and base_scheme == cur_scheme): return None # are the netlocs compatible? if base_netloc != cur_netloc: return None # are we below the application path? base_path = base_path.rstrip(u'/') if not cur_path.startswith(base_path): return None return u'/' + cur_path[len(base_path):].lstrip(u'/')
def test_uri_to_iri_dont_unquote_space(): assert urls.uri_to_iri("abc%20def") == "abc%20def"
def test_iri_safe_quoting(): uri = "http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25" iri = u"http://föö.com/%2F%25?q=ö&x=%3D%25#%25" strict_eq(urls.uri_to_iri(uri), iri) strict_eq(urls.iri_to_uri(urls.uri_to_iri(uri)), uri)
def test_iri_safe_quoting(self): uri = b'http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25' iri = u'http://föö.com/%2F%25?q=ö&x=%3D%25#%25' self.assert_strict_equal(urls.uri_to_iri(uri), iri) self.assert_strict_equal(urls.iri_to_uri(urls.uri_to_iri(uri)), uri)
def test_uri_to_iri_to_uri(): uri = 'http://xn--f-rgao.com/%C3%9E' iri = urls.uri_to_iri(uri) assert urls.iri_to_uri(iri) == uri
def test_iri_safe_quoting(self): uri = 'http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25' iri = u'http://föö.com/%2F%25?q=ö&x=%3D%25#%25' self.assert_strict_equal(urls.uri_to_iri(uri), iri) self.assert_strict_equal(urls.iri_to_uri(urls.uri_to_iri(uri)), uri)
def _as_iri(obj): if not isinstance(obj, unicode): return uri_to_iri(obj, charset, errors) return obj
def test_uri_to_iri_idempotence_ascii_only(self): uri = 'http://www.idempoten.ce' uri = urls.uri_to_iri(uri) self.assert_equal(urls.uri_to_iri(uri), uri)
def test_iri_safe_quoting(): uri = 'http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25' iri = u'http://föö.com/%2F%25?q=ö&x=%3D%25#%25' strict_eq(urls.uri_to_iri(uri), iri) strict_eq(urls.iri_to_uri(urls.uri_to_iri(uri)), uri)
def test_uri_to_iri_idempotence_non_ascii(self): uri = 'http://xn--n3h/%E2%98%83' uri = urls.uri_to_iri(uri) self.assert_equal(urls.uri_to_iri(uri), uri)
def test_iri_to_uri_to_iri(self): iri = u'http://föö.com/' uri = urls.iri_to_uri(iri) self.assert_equal(urls.uri_to_iri(uri), iri)
def extract_path_info(environ_or_baseurl, path_or_url, charset='utf-8', errors='replace', collapse_http_schemes=True): """Extracts the path info from the given URL (or WSGI environment) and path. The path info returned is a unicode string, not a bytestring suitable for a WSGI environment. The URLs might also be IRIs. If the path info could not be determined, `None` is returned. Some examples: >>> extract_path_info('http://example.com/app', '/app/hello') u'/hello' >>> extract_path_info('http://example.com/app', ... 'https://example.com/app/hello') u'/hello' >>> extract_path_info('http://example.com/app', ... 'https://example.com/app/hello', ... collapse_http_schemes=False) is None True Instead of providing a base URL you can also pass a WSGI environment. .. versionadded:: 0.6 :param environ_or_baseurl: a WSGI environment dict, a base URL or base IRI. This is the root of the application. :param path_or_url: an absolute path from the server root, a relative path (in which case it's the path info) or a full URL. Also accepts IRIs and unicode parameters. :param charset: the charset for byte data in URLs :param errors: the error handling on decode :param collapse_http_schemes: if set to `False` the algorithm does not assume that http and https on the same server point to the same resource. """ def _normalize_netloc(scheme, netloc): parts = netloc.split(u'@', 1)[-1].split(u':', 1) if len(parts) == 2: netloc, port = parts if (scheme == u'http' and port == u'80') or \ (scheme == u'https' and port == u'443'): port = None else: netloc = parts[0] port = None if port is not None: netloc += u':' + port return netloc # make sure whatever we are working on is a IRI and parse it path = uri_to_iri(path_or_url, charset, errors) if isinstance(environ_or_baseurl, dict): environ_or_baseurl = get_current_url(environ_or_baseurl, root_only=True) base_iri = uri_to_iri(environ_or_baseurl, charset, errors) base_scheme, base_netloc, base_path = url_parse(base_iri)[:3] cur_scheme, cur_netloc, cur_path, = \ url_parse(url_join(base_iri, path))[:3] # normalize the network location base_netloc = _normalize_netloc(base_scheme, base_netloc) cur_netloc = _normalize_netloc(cur_scheme, cur_netloc) # is that IRI even on a known HTTP scheme? if collapse_http_schemes: for scheme in base_scheme, cur_scheme: if scheme not in (u'http', u'https'): return None else: if not (base_scheme in (u'http', u'https') and base_scheme == cur_scheme): return None # are the netlocs compatible? if base_netloc != cur_netloc: return None # are we below the application path? base_path = base_path.rstrip(u'/') if not cur_path.startswith(base_path): return None return u'/' + cur_path[len(base_path):].lstrip(u'/')
def iri_for(endpoint, **values): """ Wrapper to url_for for utf-8 URLs. """ return uri_to_iri(url_for(endpoint, **values))
def test_iri_safe_quoting(): uri = "http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25" iri = "http://föö.com/%2F%25?q=ö&x=%3D%25#%25" assert urls.uri_to_iri(uri) == iri assert urls.iri_to_uri(urls.uri_to_iri(uri)) == uri