Example #1
0
    def test_iri_support(self):
        self.assertEqual(
            urls.uri_to_iri('http://xn--n3h.net/'),
            'http://\u2603.net/'
        )
        self.assertEqual(
            urls.uri_to_iri(
                'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th'
            ),
            'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th'
        )
        self.assertEqual(
            urls.iri_to_uri('http://☃.net/'),
            'http://xn--n3h.net/'
        )
        self.assertEqual(
            urls.iri_to_uri('http://üser:pässword@☃.net/påth'),
            'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th'
        )

        self.assertEqual(
            urls.uri_to_iri('http://test.com/%3Fmeh?foo=%26%2F'),
            'http://test.com/%3Fmeh?foo=%26%2F'
        )

        self.assertEqual(urls.iri_to_uri('/foo'), '/foo')

        self.assertEqual(
            urls.iri_to_uri('http://föö.com:8080/bam/baz'),
            'http://xn--f-1gaa.com:8080/bam/baz'
        )
Example #2
0
    def test_iri_support(self):
        self.assertEqual(
            urls.uri_to_iri('http://xn--n3h.net/'),
            'http://\u2603.net/'
        )
        self.assertEqual(
            urls.uri_to_iri(
                'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th'
            ),
            'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th'
        )
        self.assertEqual(
            urls.iri_to_uri('http://☃.net/'),
            'http://xn--n3h.net/'
        )
        self.assertEqual(
            urls.iri_to_uri('http://üser:pässword@☃.net/påth'),
            'http://%C3%BCser:p%C3%[email protected]/p%C3%A5th'
        )

        self.assertEqual(
            urls.uri_to_iri('http://test.com/%3Fmeh?foo=%26%2F'),
            'http://test.com/%3Fmeh?foo=%26%2F'
        )

        self.assertEqual(urls.iri_to_uri('/foo'), '/foo')

        self.assertEqual(
            urls.iri_to_uri('http://föö.com:8080/bam/baz'),
            'http://xn--f-1gaa.com:8080/bam/baz'
        )
Example #3
0
def get_current_url(
    environ, root_only=False, strip_querystring=False,
    host_only=False, trusted_hosts=None,
):
    """A handy helper function that recreates the full URL as IRI for the
    current request or parts of it.  Here an example:

    >>> from verktyg.test import create_environ
    >>> env = create_environ("/?param=foo", "http://localhost/script")
    >>> get_current_url(env)
    'http://localhost/script/?param=foo'
    >>> get_current_url(env, root_only=True)
    'http://localhost/script/'
    >>> get_current_url(env, host_only=True)
    'http://localhost/'
    >>> get_current_url(env, strip_querystring=True)
    'http://localhost/script/'

    This optionally it verifies that the host is in a list of trusted hosts.
    If the host is not in there it will raise a
    :exc:`~verktyg.exceptions.SecurityError`.

    Note that the string returned might contain unicode characters as the
    representation is an IRI not an URI.  If you need an ASCII only
    representation you can use the :func:`~verktyg.urls.iri_to_uri`
    function:

    >>> from verktyg.urls import iri_to_uri
    >>> iri_to_uri(get_current_url(env))
    'http://localhost/script/?param=foo'

    :param environ:
        The WSGI environment to get the current URL from.
    :param root_only:
        Set `True` if you only want the root URL.
    :param strip_querystring:
        Set to `True` if you don't want the querystring.
    :param host_only:
        Set to `True` if the host URL should be returned.
    :param trusted_hosts:
        A list of trusted hosts, see :func:`host_is_trusted` for more
        information.
    """
    tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)]
    cat = tmp.append
    if host_only:
        return uri_to_iri(''.join(tmp) + '/')
    cat(urlquote(wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))).rstrip('/'))
    cat('/')
    if not root_only:
        cat(urlquote(
            wsgi_get_bytes(environ.get('PATH_INFO', '')).lstrip(b'/')
        ))
        if not strip_querystring:
            qs = get_query_string(environ)
            if qs:
                cat('?' + qs)
    return uri_to_iri(''.join(tmp))
Example #4
0
def get_current_url(environ,
                    root_only=False,
                    strip_querystring=False,
                    host_only=False,
                    trusted_hosts=None):
    """A handy helper function that recreates the full URL as IRI for the
    current request or parts of it.  Here an example:

    >>> from verktyg.test import create_environ
    >>> env = create_environ("/?param=foo", "http://localhost/script")
    >>> get_current_url(env)
    'http://localhost/script/?param=foo'
    >>> get_current_url(env, root_only=True)
    'http://localhost/script/'
    >>> get_current_url(env, host_only=True)
    'http://localhost/'
    >>> get_current_url(env, strip_querystring=True)
    'http://localhost/script/'

    This optionally it verifies that the host is in a list of trusted hosts.
    If the host is not in there it will raise a
    :exc:`~verktyg.exceptions.SecurityError`.

    Note that the string returned might contain unicode characters as the
    representation is an IRI not an URI.  If you need an ASCII only
    representation you can use the :func:`~verktyg.urls.iri_to_uri`
    function:

    >>> from verktyg.urls import iri_to_uri
    >>> iri_to_uri(get_current_url(env))
    'http://localhost/script/?param=foo'

    :param environ: the WSGI environment to get the current URL from.
    :param root_only: set `True` if you only want the root URL.
    :param strip_querystring: set to `True` if you don't want the querystring.
    :param host_only: set to `True` if the host URL should be returned.
    :param trusted_hosts: a list of trusted hosts, see :func:`host_is_trusted`
                          for more information.
    """
    tmp = [environ['wsgi.url_scheme'], '://', get_host(environ, trusted_hosts)]
    cat = tmp.append
    if host_only:
        return uri_to_iri(''.join(tmp) + '/')
    cat(urlquote(wsgi_get_bytes(environ.get('SCRIPT_NAME', ''))).rstrip('/'))
    cat('/')
    if not root_only:
        cat(urlquote(
            wsgi_get_bytes(environ.get('PATH_INFO', '')).lstrip(b'/')))
        if not strip_querystring:
            qs = get_query_string(environ)
            if qs:
                cat('?' + qs)
    return uri_to_iri(''.join(tmp))
Example #5
0
    def test_uri_iri_normalization(self):
        uri = 'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93'
        iri = 'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713'

        tests = [
            'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713',
            'http://xn--f-rgao.com/\u2610/fred?utf8=\N{CHECK MARK}',
            'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93',
            'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93',
            'http://föñ.com/\u2610/fred?utf8=%E2%9C%93',
        ]

        for test in tests:
            self.assertEqual(urls.uri_to_iri(test), iri)
            self.assertEqual(urls.iri_to_uri(test), uri)
            self.assertEqual(urls.uri_to_iri(urls.iri_to_uri(test)), iri)
            self.assertEqual(urls.iri_to_uri(urls.uri_to_iri(test)), uri)
            self.assertEqual(urls.uri_to_iri(urls.uri_to_iri(test)), iri)
            self.assertEqual(urls.iri_to_uri(urls.iri_to_uri(test)), uri)
Example #6
0
    def test_uri_iri_normalization(self):
        uri = 'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93'
        iri = 'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713'

        tests = [
            'http://föñ.com/\N{BALLOT BOX}/fred?utf8=\u2713',
            'http://xn--f-rgao.com/\u2610/fred?utf8=\N{CHECK MARK}',
            'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93',
            'http://xn--f-rgao.com/%E2%98%90/fred?utf8=%E2%9C%93',
            'http://föñ.com/\u2610/fred?utf8=%E2%9C%93',
        ]

        for test in tests:
            self.assertEqual(urls.uri_to_iri(test), iri)
            self.assertEqual(urls.iri_to_uri(test), uri)
            self.assertEqual(urls.uri_to_iri(urls.iri_to_uri(test)), iri)
            self.assertEqual(urls.iri_to_uri(urls.uri_to_iri(test)), uri)
            self.assertEqual(urls.uri_to_iri(urls.uri_to_iri(test)), iri)
            self.assertEqual(urls.iri_to_uri(urls.iri_to_uri(test)), uri)
Example #7
0
 def test_uri_to_iri_to_uri(self):
     uri = 'http://xn--f-rgao.com/%C3%9E'
     iri = urls.uri_to_iri(uri)
     self.assertEqual(urls.iri_to_uri(iri), uri)
Example #8
0
 def test_iri_to_uri_to_iri(self):
     iri = 'http://föö.com/'
     uri = urls.iri_to_uri(iri)
     self.assertEqual(urls.uri_to_iri(uri), iri)
Example #9
0
 def test_uri_to_iri_idempotence_non_ascii(self):
     uri = 'http://xn--n3h/%E2%98%83'
     uri = urls.uri_to_iri(uri)
     self.assertEqual(urls.uri_to_iri(uri), uri)
Example #10
0
 def test_uri_to_iri_idempotence_ascii_only(self):
     uri = 'http://www.idempoten.ce'
     uri = urls.uri_to_iri(uri)
     self.assertEqual(urls.uri_to_iri(uri), uri)
Example #11
0
 def test_uri_to_iri_idempotence_ascii_only(self):
     uri = 'http://www.idempoten.ce'
     uri = urls.uri_to_iri(uri)
     self.assertEqual(urls.uri_to_iri(uri), uri)
Example #12
0
 def test_uri_to_iri_to_uri(self):
     uri = 'http://xn--f-rgao.com/%C3%9E'
     iri = urls.uri_to_iri(uri)
     self.assertEqual(urls.iri_to_uri(iri), uri)
Example #13
0
 def test_iri_to_uri_to_iri(self):
     iri = 'http://föö.com/'
     uri = urls.iri_to_uri(iri)
     self.assertEqual(urls.uri_to_iri(uri), iri)
Example #14
0
 def test_uri_to_iri_idempotence_non_ascii(self):
     uri = 'http://xn--n3h/%E2%98%83'
     uri = urls.uri_to_iri(uri)
     self.assertEqual(urls.uri_to_iri(uri), uri)
Example #15
0
def extract_path_info(environ_or_baseurl,
                      path_or_url,
                      errors='replace',
                      collapse_http_schemes=True):
    """Extracts the path info from the given URL (or WSGI environment) and
    path.  The path info returned is a unicode string, not a bytestring
    suitable for a WSGI environment.  The URLs might also be IRIs.

    If the path info could not be determined, `None` is returned.

    Some examples:

    >>> extract_path_info('http://example.com/app', '/app/hello')
    u'/hello'
    >>> extract_path_info('http://example.com/app',
    ...                   'https://example.com/app/hello')
    u'/hello'
    >>> extract_path_info('http://example.com/app',
    ...                   'https://example.com/app/hello',
    ...                   collapse_http_schemes=False) is None
    True

    Instead of providing a base URL you can also pass a WSGI environment.

    :param environ_or_baseurl:
        A WSGI environment dict, a base URL or base IRI.  This is the root of
        the application.
    :param path_or_url:
        An absolute path from the server root, a relative path (in which case
        it's the path info) or a full URL.  Also accepts IRIs and unicode
        parameters.
    :param errors:
        The error handling on decode.
    :param collapse_http_schemes:
        If set to `False` the algorithm does not assume that http and https on
        the same server point to the same resource.
    """
    def _normalize_netloc(scheme, netloc):
        parts = netloc.split(u'@', 1)[-1].split(u':', 1)
        if len(parts) == 2:
            netloc, port = parts
            if ((scheme == u'http' and port == u'80')
                    or (scheme == u'https' and port == u'443')):
                port = None
        else:
            netloc = parts[0]
            port = None
        if port is not None:
            netloc += u':' + port
        return netloc

    # make sure whatever we are working on is a IRI and parse it
    path = uri_to_iri(path_or_url, errors=errors)
    if isinstance(environ_or_baseurl, dict):
        environ_or_baseurl = get_current_url(environ_or_baseurl,
                                             root_only=True)
    base_iri = uri_to_iri(environ_or_baseurl, errors=errors)
    base_scheme, base_netloc, base_path = urlsplit(base_iri)[:3]
    cur_scheme, cur_netloc, cur_path, = urlsplit(urljoin(base_iri, path))[:3]

    # normalize the network location
    base_netloc = _normalize_netloc(base_scheme, base_netloc)
    cur_netloc = _normalize_netloc(cur_scheme, cur_netloc)

    # is that IRI even on a known HTTP scheme?
    if collapse_http_schemes:
        for scheme in base_scheme, cur_scheme:
            if scheme not in (u'http', u'https'):
                return None
    else:
        if not (base_scheme in (u'http', u'https')
                and base_scheme == cur_scheme):
            return None

    # are the netlocs compatible?
    if base_netloc != cur_netloc:
        return None

    # are we below the application path?
    base_path = base_path.rstrip(u'/')
    if not cur_path.startswith(base_path):
        return None

    return u'/' + cur_path[len(base_path):].lstrip(u'/')
Example #16
0
 def test_iri_safe_quoting(self):
     uri = 'http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25'
     iri = 'http://föö.com/%2F%25?q=ö&x=%3D%25#%25'
     self.assertEqual(urls.uri_to_iri(uri), iri)
     self.assertEqual(urls.iri_to_uri(urls.uri_to_iri(uri)), uri)
Example #17
0
 def test_iri_safe_quoting(self):
     uri = 'http://xn--f-1gaa.com/%2F%25?q=%C3%B6&x=%3D%25#%25'
     iri = 'http://föö.com/%2F%25?q=ö&x=%3D%25#%25'
     self.assertEqual(urls.uri_to_iri(uri), iri)
     self.assertEqual(urls.iri_to_uri(urls.uri_to_iri(uri)), uri)