Example #1
0
def stringify(
    value: Any, encoding_default: str = DEFAULT_ENCODING, encoding: Optional[str] = None
) -> Optional[str]:
    """Brute-force convert a given object to a string.

    This will attempt an increasingly mean set of conversions to make a given
    object into a unicode string. It is guaranteed to either return unicode or
    None, if all conversions failed (or the value is indeed empty).
    """
    if value is None:
        return None
    if isinstance(value, str):
        return _clean_empty(value)
    if isinstance(value, (date, datetime)):
        return value.isoformat()
    elif isinstance(value, (float, Decimal)):
        return Decimal(value).to_eng_string()
    elif isinstance(value, bytes):
        if encoding is None:
            encoding = guess_encoding(value, default=encoding_default)
        value = value.decode(encoding, "replace")
        value = remove_unsafe_chars(value)
        if value is None:
            return None
        return _clean_empty(value)
    return _clean_empty(str(value))
Example #2
0
def stringify(value, encoding_default='utf-8', encoding=None):
    """Brute-force convert a given object to a string.

    This will attempt an increasingly mean set of conversions to make a given
    object into a unicode string. It is guaranteed to either return unicode or
    None, if all conversions failed (or the value is indeed empty).
    """
    if value is None:
        return None

    if not isinstance(value, six.text_type):
        if isinstance(value, (date, datetime)):
            return value.isoformat()
        elif isinstance(value, (float, Decimal)):
            return Decimal(value).to_eng_string()
        elif isinstance(value, six.binary_type):
            if encoding is None:
                encoding = guess_encoding(value, default=encoding_default)
            value = value.decode(encoding, 'replace')
            value = remove_byte_order_mark(value)
            value = remove_unsafe_chars(value)
        else:
            value = six.text_type(value)

    # XXX: is this really a good idea?
    value = value.strip()
    if not len(value):
        return None
    return value
Example #3
0
 def _convert_value(self, value, table, column):
     if isinstance(column.type, (types.DateTime, types.Date)):
         if value in ('0000-00-00 00:00:00', '0000-00-00'):
             value = None
     if isinstance(column.type, (types.String, types.Unicode)):
         if isinstance(value, str):
             value = remove_unsafe_chars(value)
     return value
Example #4
0
def sanitize_text(text, encoding=DEFAULT_ENCODING):
    text = stringify(text, encoding_default=encoding)
    text = remove_unsafe_chars(text)
    if text is None:
        return
    text = text.encode(encoding, 'replace')
    text = text.decode(encoding, 'strict')
    return text
Example #5
0
def safe_string(data, encoding_default='utf-8', encoding=None):
    """Stringify and round-trip through encoding."""
    data = stringify(data,
                     encoding_default=encoding_default,
                     encoding=encoding)
    data = remove_unsafe_chars(data)
    if data is None:
        return
    data = data.encode(encoding_default, 'replace')
    data = data.decode(encoding_default, 'strict')
    return data
Example #6
0
def sanitize_text(text, encoding=DEFAULT_ENCODING):
    text = stringify(text, encoding_default=encoding)
    if text is not None:
        try:
            text = compose_nfc(text)
        except (SystemError, Exception) as ex:
            log.warning("Cannot NFC text: %s", ex)
            return None
        text = remove_unsafe_chars(text)
        text = text.encode(DEFAULT_ENCODING, "replace")
        return text.decode(DEFAULT_ENCODING, "replace")
Example #7
0
def sanitize_text(text: Any, encoding: str = DEFAULT_ENCODING) -> Optional[str]:
    text = stringify(text, encoding_default=encoding)
    if text is None:
        return None
    try:
        text = compose_nfc(text)
    except (SystemError, Exception) as ex:
        log.warning("Cannot NFC text: %s", ex)
        return None
    text = remove_unsafe_chars(text)
    if text is None:
        return None
    byte_text = text.encode(DEFAULT_ENCODING, "replace")
    return cast(str, byte_text.decode(DEFAULT_ENCODING, "replace"))
Example #8
0
    def _getStringStream(self, filename):
        """Gets a string representation of the requested filename.
        Checks for both ASCII and Unicode representations and returns
        a value if possible.  If there are both ASCII and Unicode
        versions, then the parameter /prefer/ specifies which will be
        returned.
        """

        if isinstance(filename, list):
            # Join with slashes to make it easier to append the type
            filename = "/".join(filename)

        value = windowsUnicode(self._getStream(filename + '001F'))
        if value is None:
            raw = self._getStream(filename + '001E')
            try:
                value = decode_utf7(raw)
            except Exception:
                encoding = guess_encoding(raw)
                value = raw.decode(encoding, 'replace')

        if value is not None and len(value):
            return remove_unsafe_chars(value)