Example #1
0
def iri_to_uri(iri):
    """
    Convert an Internationalized Resource Identifier (IRI) portion to a URI
    portion that is suitable for inclusion in a URL.

    This is the algorithm from section 3.1 of RFC 3987.  However, since we are
    assuming input is either UTF-8 or unicode already, we can simplify things a
    little from the full method.

    Takes an IRI in UTF-8 bytes (e.g. '/I \xe2\x99\xa5 Django/') or unicode
    (e.g. '/I ♥ Django/') and returns ASCII bytes containing the encoded result
    (e.g. '/I%20%E2%99%A5%20Django/').
    """
    # The list of safe characters here is constructed from the "reserved" and
    # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
    #     reserved    = gen-delims / sub-delims
    #     gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
    #     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
    #                   / "*" / "+" / "," / ";" / "="
    #     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
    # Of the unreserved characters, urllib.note already considers all but
    # the ~ safe.
    # The % character is also added to the list of safe characters here, as the
    # end of section 3.1 of RFC 3987 specifically mentions that % must not be
    # converted.
    if iri is None:
        return iri
    return note(force_bytes(iri), safe=b"/#%[]=:;$&()+,!?*@'~")
Example #2
0
def urlnote(url, safe='/'):
    """
    A version of Python's urllib.note() function that can operate on unicode
    strings. The url is first UTF-8 encoded before quoting. The returned string
    can safely be used as part of an argument to a subsequent iri_to_uri() call
    without double-quoting occurring.
    """
    return force_text(note(force_str(url), force_str(safe)))
 def unnote_note(segment):
     segment = unnote(force_str(segment))
     # Tilde is part of RFC3986 Unreserved Characters
     # http://tools.ietf.org/html/rfc3986#section-2.3
     # See also http://bugs.python.org/issue16285
     segment = note(segment,
                    safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + str('~'))
     return force_text(segment)
Example #4
0
def repercent_broken_unicode(path):
    """
    As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
    we need to re-percent-encode any octet produced that is not part of a
    strictly legal UTF-8 octet sequence.
    """
    try:
        path.decode('utf-8')
    except UnicodeDecodeError as e:
        repercent = note(path[e.start:e.end], safe=b"/#%[]=:;$&()+,!?*@'~")
        path = repercent_broken_unicode(path[:e.start] +
                                        force_bytes(repercent) + path[e.end:])
    return path
Example #5
0
def escape_uri_path(path):
    """
    Escape the unsafe characters from the path portion of a Uniform Resource
    Identifier (URI).
    """
    # These are the "reserved" and "unreserved" characters specified in
    # sections 2.2 and 2.3 of RFC 2396:
    #   reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
    #   unreserved  = alphanum | mark
    #   mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
    # The list of safe characters here is constructed subtracting ";", "=",
    # and "?" according to section 3.3 of RFC 2396.
    # The reason for not subtracting and escaping "/" is that we are escaping
    # the entire path, not a path segment.
    return note(force_bytes(path), safe=b"/:@&+$,-_.!~*'()")
Example #6
0
def filepath_to_uri(path):
    """Convert a file system path to a URI portion that is suitable for
    inclusion in a URL.

    We are assuming input is either UTF-8 or unicode already.

    This method will encode certain chars that would normally be recognized as
    special chars for URIs.  Note that this method does not encode the '
    character, as it is a valid character within URIs.  See
    encodeURIComponent() JavaScript function for more details.

    Returns an ASCII string containing the encoded result.
    """
    if path is None:
        return path
    # I know about `os.sep` and `os.altsep` but I want to leave
    # some flexibility for hardcoding separators.
    return note(force_bytes(path).replace(b"\\", b"/"), safe=b"/~!*()'")
Example #7
0
 def handle_simple(cls, path):
     if apps.is_installed('django.contrib.staticfiles'):
         from django.contrib.staticfiles.storage import staticfiles_storage
         return staticfiles_storage.url(path)
     else:
         return urljoin(PrefixNode.handle_simple("STATIC_URL"), note(path))
Example #8
0
 def encode(k, v):
     return '%s=%s' % ((note(k, safe), note(v, safe)))