Python _decode_unicodeの例、_internal._decode_unicode Pythonの例

コード例 #1

0

ファイルを表示

ファイル: urls.py プロジェクト: AntonioMtn/NZBMegaSearch

def uri_to_iri(uri, charset='utf-8', errors='replace'):
    r"""Converts a URI in a given charset to a IRI.

    Examples for URI versus IRI

    >>> uri_to_iri('http://xn--n3h.net/')
    u'http://\u2603.net/'
    >>> uri_to_iri('http://%C3%BCser:p%C3%[email protected]/p%C3%A5th')
    u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th'

    Query strings are left unchanged:

    >>> uri_to_iri('/?foo=24&x=%26%2f')
    u'/?foo=24&x=%26%2f'

    .. versionadded:: 0.6

    :param uri: the URI to convert
    :param charset: the charset of the URI
    :param errors: the error handling on decode
    """
    uri = url_fix(str(uri), charset)
    scheme, auth, hostname, port, path, query, fragment = _uri_split(uri)

    scheme = _decode_unicode(scheme, 'ascii', errors)

    try:
        hostname = hostname.decode('idna')
    except UnicodeError:
        # dammit, that codec raised an error.  Because it does not support
        # any error handling we have to fake it.... badly
        if errors not in ('ignore', 'replace'):
            raise
        hostname = hostname.decode('ascii', errors)

    if auth:
        if ':' in auth:
            auth, password = auth.split(':', 1)
        else:
            password = None
        auth = _decode_unicode(_unquote(auth), charset, errors)
        if password:
            auth += u':' + _decode_unicode(_unquote(password),
                                           charset, errors)
        hostname = auth + u'@' + hostname
    if port:
        # port should be numeric, but you never know...
        hostname += u':' + port.decode(charset, errors)

    path = _decode_unicode(_unquote(path, '/;?'), charset, errors)
    query = _decode_unicode(_unquote(query, ';/?:@&=+,$'),
                            charset, errors)

    return urlparse.urlunsplit([scheme, hostname, path, query, fragment])

コード例 #2

0

ファイルを表示

ファイル: urls.py プロジェクト: JnrnZEDb/NZBMegaSearch

def uri_to_iri(uri, charset='utf-8', errors='replace'):
    r"""Converts a URI in a given charset to a IRI.

    Examples for URI versus IRI

    >>> uri_to_iri('http://xn--n3h.net/')
    u'http://\u2603.net/'
    >>> uri_to_iri('http://%C3%BCser:p%C3%[email protected]/p%C3%A5th')
    u'http://\xfcser:p\xe4ssword@\u2603.net/p\xe5th'

    Query strings are left unchanged:

    >>> uri_to_iri('/?foo=24&x=%26%2f')
    u'/?foo=24&x=%26%2f'

    .. versionadded:: 0.6

    :param uri: the URI to convert
    :param charset: the charset of the URI
    :param errors: the error handling on decode
    """
    uri = url_fix(str(uri), charset)
    scheme, auth, hostname, port, path, query, fragment = _uri_split(uri)

    scheme = _decode_unicode(scheme, 'ascii', errors)

    try:
        hostname = hostname.decode('idna')
    except UnicodeError:
        # dammit, that codec raised an error.  Because it does not support
        # any error handling we have to fake it.... badly
        if errors not in ('ignore', 'replace'):
            raise
        hostname = hostname.decode('ascii', errors)

    if auth:
        if ':' in auth:
            auth, password = auth.split(':', 1)
        else:
            password = None
        auth = _decode_unicode(_unquote(auth), charset, errors)
        if password:
            auth += u':' + _decode_unicode(_unquote(password), charset, errors)
        hostname = auth + u'@' + hostname
    if port:
        # port should be numeric, but you never know...
        hostname += u':' + port.decode(charset, errors)

    path = _decode_unicode(_unquote(path, '/;?'), charset, errors)
    query = _decode_unicode(_unquote(query, ';/?:@&=+,$'), charset, errors)

    return urlparse.urlunsplit([scheme, hostname, path, query, fragment])

コード例 #3

0

ファイルを表示

ファイル: templates.py プロジェクト: OldSchoolOnline/usntssearch

    def from_file(cls,
                  file,
                  charset='utf-8',
                  errors='strict',
                  unicode_mode=True):
        """Load a template from a file.

        .. versionchanged:: 0.5
            The encoding parameter was renamed to charset.

        :param file: a filename or file object to load the template from.
        :param charset: the charset of the template to load.
        :param errors: the error behavior of the charset decoding.
        :param unicode_mode: set to `False` to disable unicode mode.
        :return: a template
        """
        close = False
        f = file
        if isinstance(file, basestring):
            f = open(file, 'r')
            close = True
        try:
            data = _decode_unicode(f.read(), charset, errors)
        finally:
            if close:
                f.close()
        return cls(data, getattr(f, 'name', '<template>'), charset, errors,
                   unicode_mode)

コード例 #4

0

ファイルを表示

ファイル: templates.py プロジェクト: AntonioMtn/NZBMegaSearch

    def from_file(cls, file, charset='utf-8', errors='strict',
                  unicode_mode=True):
        """Load a template from a file.

        .. versionchanged:: 0.5
            The encoding parameter was renamed to charset.

        :param file: a filename or file object to load the template from.
        :param charset: the charset of the template to load.
        :param errors: the error behavior of the charset decoding.
        :param unicode_mode: set to `False` to disable unicode mode.
        :return: a template
        """
        close = False
        f = file
        if isinstance(file, basestring):
            f = open(file, 'r')
            close = True
        try:
            data = _decode_unicode(f.read(), charset, errors)
        finally:
            if close:
                f.close()
        return cls(data, getattr(f, 'name', '<template>'), charset,
                   errors, unicode_mode)

コード例 #5

0

ファイルを表示

ファイル: formparser.py プロジェクト: OldSchoolOnline/usntssearch

 def start_file_streaming(self, filename, headers, total_content_length):
     filename = _decode_unicode(filename, self.charset, self.errors)
     filename = self._fix_ie_filename(filename)
     content_type = headers.get('content_type')
     try:
         content_length = int(headers['content-length'])
     except (KeyError, ValueError):
         content_length = 0
     container = self.stream_factory(total_content_length, content_type,
                                     filename, content_length)
     return filename, container

コード例 #6

0

ファイルを表示

ファイル: templates.py プロジェクト: AntonioMtn/NZBMegaSearch

 def __init__(self, source, filename='<template>', charset='utf-8',
              errors='strict', unicode_mode=True):
     if isinstance(source, str):
         source = _decode_unicode(source, charset, errors)
     if isinstance(filename, unicode):
         filename = filename.encode('utf-8')
     node = Parser(tokenize(u'\n'.join(source.splitlines()),
                            filename), filename).parse()
     self.code = TemplateCodeGenerator(node, filename).getCode()
     self.filename = filename
     self.charset = charset
     self.errors = errors
     self.unicode_mode = unicode_mode

コード例 #7

0

ファイルを表示

ファイル: urls.py プロジェクト: AntonioMtn/NZBMegaSearch

def url_unquote(s, charset='utf-8', errors='replace'):
    """URL decode a single string with a given decoding.

    Per default encoding errors are ignored.  If you want a different behavior
    you can set `errors` to ``'replace'`` or ``'strict'``.  In strict mode a
    `HTTPUnicodeError` is raised.

    :param s: the string to unquote.
    :param charset: the charset to be used.
    :param errors: the error handling for the charset decoding.
    """
    if isinstance(s, unicode):
        s = s.encode(charset)
    return _decode_unicode(_unquote(s), charset, errors)

コード例 #8

0

ファイルを表示

ファイル: urls.py プロジェクト: JnrnZEDb/NZBMegaSearch

def url_unquote(s, charset='utf-8', errors='replace'):
    """URL decode a single string with a given decoding.

    Per default encoding errors are ignored.  If you want a different behavior
    you can set `errors` to ``'replace'`` or ``'strict'``.  In strict mode a
    `HTTPUnicodeError` is raised.

    :param s: the string to unquote.
    :param charset: the charset to be used.
    :param errors: the error handling for the charset decoding.
    """
    if isinstance(s, unicode):
        s = s.encode(charset)
    return _decode_unicode(_unquote(s), charset, errors)

コード例 #9

0

ファイルを表示

ファイル: urls.py プロジェクト: JnrnZEDb/NZBMegaSearch

def _url_decode_impl(pair_iter, charset, decode_keys, include_empty, errors):
    for pair in pair_iter:
        if not pair:
            continue
        if '=' in pair:
            key, value = pair.split('=', 1)
        else:
            if not include_empty:
                continue
            key = pair
            value = ''
        key = _unquote_plus(key)
        if decode_keys:
            key = _decode_unicode(key, charset, errors)
        yield key, url_unquote_plus(value, charset, errors)

コード例 #10

0

ファイルを表示

ファイル: urls.py プロジェクト: AntonioMtn/NZBMegaSearch

def _url_decode_impl(pair_iter, charset, decode_keys, include_empty,
                     errors):
    for pair in pair_iter:
        if not pair:
            continue
        if '=' in pair:
            key, value = pair.split('=', 1)
        else:
            if not include_empty:
                continue
            key = pair
            value = ''
        key = _unquote_plus(key)
        if decode_keys:
            key = _decode_unicode(key, charset, errors)
        yield key, url_unquote_plus(value, charset, errors)

コード例 #11

0

ファイルを表示

ファイル: templates.py プロジェクト: OldSchoolOnline/usntssearch

 def __init__(self,
              source,
              filename='<template>',
              charset='utf-8',
              errors='strict',
              unicode_mode=True):
     if isinstance(source, str):
         source = _decode_unicode(source, charset, errors)
     if isinstance(filename, unicode):
         filename = filename.encode('utf-8')
     node = Parser(tokenize(u'\n'.join(source.splitlines()), filename),
                   filename).parse()
     self.code = TemplateCodeGenerator(node, filename).getCode()
     self.filename = filename
     self.charset = charset
     self.errors = errors
     self.unicode_mode = unicode_mode

コード例 #12

0

ファイルを表示

ファイル: http.py プロジェクト: Mirabis/usntssearch

def parse_cookie(header, charset='utf-8', errors='replace',
                 cls=None):
    """Parse a cookie.  Either from a string or WSGI environ.

    Per default encoding errors are ignored.  If you want a different behavior
    you can set `errors` to ``'replace'`` or ``'strict'``.  In strict mode a
    :exc:`HTTPUnicodeError` is raised.

    .. versionchanged:: 0.5
       This function now returns a :class:`TypeConversionDict` instead of a
       regular dict.  The `cls` parameter was added.

    :param header: the header to be used to parse the cookie.  Alternatively
                   this can be a WSGI environment.
    :param charset: the charset for the cookie values.
    :param errors: the error behavior for the charset decoding.
    :param cls: an optional dict class to use.  If this is not specified
                       or `None` the default :class:`TypeConversionDict` is
                       used.
    """
    if isinstance(header, dict):
        header = header.get('HTTP_COOKIE', '')
    if cls is None:
        cls = TypeConversionDict
    cookie = _ExtendedCookie()
    cookie.load(header)
    result = {}

    # decode to unicode and skip broken items.  Our extended morsel
    # and extended cookie will catch CookieErrors and convert them to
    # `None` items which we have to skip here.
    for key, value in cookie.iteritems():
        if value.value is not None:
            result[key] = _decode_unicode(unquote_header_value(value.value),
                                          charset, errors)

    return cls(result)

コード例 #13

0

ファイルを表示

ファイル: http.py プロジェクト: OldSchoolOnline/usntssearch

def parse_cookie(header, charset='utf-8', errors='replace', cls=None):
    """Parse a cookie.  Either from a string or WSGI environ.

    Per default encoding errors are ignored.  If you want a different behavior
    you can set `errors` to ``'replace'`` or ``'strict'``.  In strict mode a
    :exc:`HTTPUnicodeError` is raised.

    .. versionchanged:: 0.5
       This function now returns a :class:`TypeConversionDict` instead of a
       regular dict.  The `cls` parameter was added.

    :param header: the header to be used to parse the cookie.  Alternatively
                   this can be a WSGI environment.
    :param charset: the charset for the cookie values.
    :param errors: the error behavior for the charset decoding.
    :param cls: an optional dict class to use.  If this is not specified
                       or `None` the default :class:`TypeConversionDict` is
                       used.
    """
    if isinstance(header, dict):
        header = header.get('HTTP_COOKIE', '')
    if cls is None:
        cls = TypeConversionDict
    cookie = _ExtendedCookie()
    cookie.load(header)
    result = {}

    # decode to unicode and skip broken items.  Our extended morsel
    # and extended cookie will catch CookieErrors and convert them to
    # `None` items which we have to skip here.
    for key, value in cookie.iteritems():
        if value.value is not None:
            result[key] = _decode_unicode(unquote_header_value(value.value),
                                          charset, errors)

    return cls(result)

コード例 #14

0

ファイルを表示

ファイル: formparser.py プロジェクト: OldSchoolOnline/usntssearch

    def parse(self, file, boundary, content_length):
        next_part = '--' + boundary
        last_part = next_part + '--'

        form = []
        files = []
        in_memory = 0

        iterator = chain(make_line_iter(file, limit=content_length,
                                        buffer_size=self.buffer_size),
                         _empty_string_iter)

        terminator = self._find_terminator(iterator)
        if terminator != next_part:
            self.fail('Expected boundary at start of multipart data')

        while terminator != last_part:
            headers = parse_multipart_headers(iterator)

            disposition = headers.get('content-disposition')
            if disposition is None:
                self.fail('Missing Content-Disposition header')
            disposition, extra = parse_options_header(disposition)
            transfer_encoding = self.get_part_encoding(headers)
            name = extra.get('name')
            filename = extra.get('filename')
            part_charset = self.get_part_charset(headers)

            # if no content type is given we stream into memory.  A list is
            # used as a temporary container.
            if filename is None:
                is_file = False
                container = []
                _write = container.append
                guard_memory = self.max_form_memory_size is not None

            # otherwise we parse the rest of the headers and ask the stream
            # factory for something we can write in.
            else:
                is_file = True
                guard_memory = False
                filename, container = self.start_file_streaming(
                    filename, headers, content_length)
                _write = container.write

            buf = ''
            for line in iterator:
                if not line:
                    self.fail('unexpected end of stream')

                if line[:2] == '--':
                    terminator = line.rstrip()
                    if terminator in (next_part, last_part):
                        break

                if transfer_encoding is not None:
                    try:
                        line = line.decode(transfer_encoding)
                    except Exception:
                        self.fail('could not decode transfer encoded chunk')

                # we have something in the buffer from the last iteration.
                # this is usually a newline delimiter.
                if buf:
                    _write(buf)
                    buf = ''

                # If the line ends with windows CRLF we write everything except
                # the last two bytes.  In all other cases however we write
                # everything except the last byte.  If it was a newline, that's
                # fine, otherwise it does not matter because we will write it
                # the next iteration.  this ensures we do not write the
                # final newline into the stream.  That way we do not have to
                # truncate the stream.  However we do have to make sure that
                # if something else than a newline is in there we write it
                # out.
                if line[-2:] == '\r\n':
                    buf = '\r\n'
                    cutoff = -2
                else:
                    buf = line[-1]
                    cutoff = -1
                _write(line[:cutoff])

                # if we write into memory and there is a memory size limit we
                # count the number of bytes in memory and raise an exception if
                # there is too much data in memory.
                if guard_memory:
                    in_memory += len(line)
                    if in_memory > self.max_form_memory_size:
                        self.in_memory_threshold_reached(in_memory)
            else:  # pragma: no cover
                raise ValueError('unexpected end of part')

            # if we have a leftover in the buffer that is not a newline
            # character we have to flush it, otherwise we will chop of
            # certain values.
            if buf not in ('', '\r', '\n', '\r\n'):
                _write(buf)

            if is_file:
                container.seek(0)
                files.append((name, FileStorage(container, filename, name,
                                                headers=headers)))
            else:
                form.append((name, _decode_unicode(''.join(container),
                                                   part_charset, self.errors)))

        return self.cls(form), self.cls(files)

コード例 #15

0

ファイルを表示

ファイル: templates.py プロジェクト: OldSchoolOnline/usntssearch

 def to_unicode(self, value):
     if isinstance(value, str):
         return _decode_unicode(value, self.charset, self.errors)
     return unicode(value)

コード例 #16

0

ファイルを表示

ファイル: templates.py プロジェクト: AntonioMtn/NZBMegaSearch

 def to_unicode(self, value):
     if isinstance(value, str):
         return _decode_unicode(value, self.charset, self.errors)
     return unicode(value)