Example #1
0
def to_unicode(obj, encoding='utf-8', fallback='latin1', **decode_args):
    r"""
    Returns a ``unicode`` of ``obj``, decoding using ``encoding`` if necessary.
    If decoding fails, the ``fallback`` encoding (default ``latin1``) is used.

    Examples::

        >>> r(to_unicode(b'\xe1\x88\xb4'))
        u'\u1234'
        >>> r(to_unicode(b'\xff'))
        u'\xff'
        >>> r(to_unicode(u'\u1234'))
        u'\u1234'
        >>> r(to_unicode(Exception(u'\u1234')))
        u'\u1234'
        >>> r(to_unicode([42]))
        u'[42]'

    See source code for detailed semantics.
    """

    # Note: on py3, the `bytes` type defines an unhelpful "__str__" function,
    # so we need to do this check (see comments in ``to_str``).
    if not isinstance(obj, binary_type):
        if isinstance(obj, text_type) or hasattr(obj, text_type_magicmethod):
            return text_type(obj)

        obj_str = binary_type(obj)
    else:
        obj_str = obj

    try:
        return text_type(obj_str, encoding, **decode_args)
    except UnicodeDecodeError:
        return text_type(obj_str, fallback, **decode_args)
Example #2
0
def to_unicode(obj, encoding='utf-8', fallback='latin1', **decode_args):
    r"""
    Returns a ``unicode`` of ``obj``, decoding using ``encoding`` if necessary.
    If decoding fails, the ``fallback`` encoding (default ``latin1``) is used.

    Examples::

        >>> r(to_unicode(b'\xe1\x88\xb4'))
        u'\u1234'
        >>> r(to_unicode(b'\xff'))
        u'\xff'
        >>> r(to_unicode(u'\u1234'))
        u'\u1234'
        >>> r(to_unicode(Exception(u'\u1234')))
        u'\u1234'
        >>> r(to_unicode([42]))
        u'[42]'

    See source code for detailed semantics.
    """

    # Note: on py3, the `bytes` type defines an unhelpful "__str__" function,
    # so we need to do this check (see comments in ``to_str``).
    if not isinstance(obj, binary_type):
        if isinstance(obj, text_type) or hasattr(obj, text_type_magicmethod):
            return text_type(obj)

        obj_str = binary_type(obj)
    else:
        obj_str = obj

    try:
        return text_type(obj_str, encoding, **decode_args)
    except UnicodeDecodeError:
        return text_type(obj_str, fallback, **decode_args)
Example #3
0
def to_str(obj, encoding='utf-8', **encode_args):
    r"""
    Returns a ``str`` of ``obj``, encoding using ``encoding`` if necessary. For
    example::

        >>> some_str = b"\xff"
        >>> some_unicode = u"\u1234"
        >>> some_exception = Exception(u'Error: ' + some_unicode)
        >>> r(to_str(some_str))
        b'\xff'
        >>> r(to_str(some_unicode))
        b'\xe1\x88\xb4'
        >>> r(to_str(some_exception))
        b'Error: \xe1\x88\xb4'
        >>> r(to_str([42]))
        b'[42]'

    See source code for detailed semantics.
    """
    # Note: On py3, ``b'x'.__str__()`` returns ``"b'x'"``, so we need to do the
    # explicit check first.
    if isinstance(obj, binary_type):
        return obj

    # We coerce to unicode if '__unicode__' is available because there is no
    # way to specify encoding when calling ``str(obj)``, so, eg,
    # ``str(Exception(u'\u1234'))`` will explode.
    if isinstance(obj, text_type) or hasattr(obj, text_type_magicmethod):
        # Note: unicode(u'foo') is O(1) (by experimentation)
        return text_type(obj).encode(encoding, **encode_args)

    return binary_type(obj)
Example #4
0
def to_str(obj, encoding='utf-8', **encode_args):
    r"""
    Returns a ``str`` of ``obj``, encoding using ``encoding`` if necessary. For
    example::

        >>> some_str = b"\xff"
        >>> some_unicode = u"\u1234"
        >>> some_exception = Exception(u'Error: ' + some_unicode)
        >>> r(to_str(some_str))
        b'\xff'
        >>> r(to_str(some_unicode))
        b'\xe1\x88\xb4'
        >>> r(to_str(some_exception))
        b'Error: \xe1\x88\xb4'
        >>> r(to_str([42]))
        b'[42]'

    See source code for detailed semantics.
    """
    # Note: On py3, ``b'x'.__str__()`` returns ``"b'x'"``, so we need to do the
    # explicit check first.
    if isinstance(obj, binary_type):
        return obj

    # We coerce to unicode if '__unicode__' is available because there is no
    # way to specify encoding when calling ``str(obj)``, so, eg,
    # ``str(Exception(u'\u1234'))`` will explode.
    if isinstance(obj, text_type) or hasattr(obj, text_type_magicmethod):
        # Note: unicode(u'foo') is O(1) (by experimentation)
        return text_type(obj).encode(encoding, **encode_args)

    return binary_type(obj)