Ejemplo n.º 1
0
    def _encode_field(self, value, param="field"):
        """convert field to internal representation.

        internal representation is always bytes. byte strings are left as-is,
        unicode strings encoding using file's default encoding (or ``utf-8``
        if no encoding has been specified).

        :raises UnicodeEncodeError:
            if unicode value cannot be encoded using default encoding.

        :raises ValueError:
            if resulting byte string contains a forbidden character,
            or is too long (>255 bytes).

        :returns:
            encoded identifer as bytes
        """
        if isinstance(value, unicode):
            value = value.encode(self.encoding)
        elif not isinstance(value, bytes):
            raise ExpectedStringError(value, param)
        if len(value) > 255:
            raise ValueError("%s must be at most 255 characters: %r" %
                             (param, value))
        if any(c in _INVALID_FIELD_CHARS for c in value):
            raise ValueError("%s contains invalid characters: %r" % (
                param,
                value,
            ))
        return value
Ejemplo n.º 2
0
def to_unicode(source, encoding="utf-8", param="value"):
    """Helper to normalize input to unicode.

    :arg source:
        source bytes/unicode to process.

    :arg encoding:
        encoding to use when decoding bytes instances.

    :param param:
        optional name of variable/noun to reference when raising errors.

    :raises TypeError: if source is not unicode or bytes.

    :returns:
        * returns unicode strings unchanged.
        * returns bytes strings decoded using *encoding*
    """
    assert encoding
    if isinstance(source, unicode):
        return source
    elif isinstance(source, bytes):
        return source.decode(encoding)
    else:
        raise ExpectedStringError(source, param)
Ejemplo n.º 3
0
 def to_native_str(source, encoding="utf-8", param="value"):
     if isinstance(source, bytes):
         return source
     elif isinstance(source, unicode):
         return source.encode(encoding)
     else:
         raise ExpectedStringError(source, param)
Ejemplo n.º 4
0
def to_bytes(source, encoding="utf-8", param="value", source_encoding=None):
    """Helper to normalize input to bytes.

    :arg source:
        Source bytes/unicode to process.

    :arg encoding:
        Target encoding (defaults to ``"utf-8"``).

    :param param:
        Optional name of variable/noun to reference when raising errors

    :param source_encoding:
        If this is specified, and the source is bytes,
        the source will be transcoded from *source_encoding* to *encoding*
        (via unicode).

    :raises TypeError: if source is not unicode or bytes.

    :returns:
        * unicode strings will be encoded using *encoding*, and returned.
        * if *source_encoding* is not specified, byte strings will be
          returned unchanged.
        * if *source_encoding* is specified, byte strings will be transcoded
          to *encoding*.
    """
    assert encoding
    if isinstance(source, bytes):
        if source_encoding and not is_same_codec(source_encoding, encoding):
            return source.decode(source_encoding).encode(encoding)
        else:
            return source
    elif isinstance(source, unicode):
        return source.encode(encoding)
    else:
        raise ExpectedStringError(source, param)