def _convertText(self, text):
        if not self._charset:
            # charset is still unknown: guess the charset
            self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)

        # Try to convert to Unicode
        try:
            return unicode(text, self._charset, "strict")
        except UnicodeDecodeError as err:
            pass

        #--- Conversion error ---

        # Fix truncated UTF-16 string like 'B\0e' (3 bytes)
        # => Add missing nul byte: 'B\0e\0' (4 bytes)
        if err.reason == "truncated data" \
        and err.end == len(text) \
        and self._charset == "UTF-16-LE":
            try:
                text = unicode(text+"\0", self._charset, "strict")
                self.warning("Fix truncated %s string: add missing nul byte" % self._charset)
                return text
            except UnicodeDecodeError as err:
                pass

        # On error, use FALLBACK_CHARSET
        self.warning(u"Unable to convert string to Unicode: %s" % err)
        return unicode(text, FALLBACK_CHARSET, "strict")
    def _convertText(self, text):
        if not self._charset:
            # charset is still unknown: guess the charset
            self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)

        # Try to convert to Unicode
        try:
            return unicode(text, self._charset, "strict")
        except UnicodeDecodeError, err:
            pass
Beispiel #3
0
    def _convertText(self, text):
        if not self._charset:
            # charset is still unknown: guess the charset
            self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)

        # Try to convert to Unicode
        try:
            return unicode(text, self._charset, "strict")
        except UnicodeDecodeError, err:
            pass
Beispiel #4
0
    def _convertText(self, text, charset):
        # No charset: use fallback charset
        if not charset:
            charset = guessBytesCharset(text, default=None)
            if not charset:
                return unicode(text, FALLBACK_CHARSET, "strict")

        # Try to convert to Unicode
        try:
            return unicode(text, charset, "strict")
        except UnicodeDecodeError, err:
            pass
def guessStreamCharset(stream, address, size, default=None):
    size = min(size, 1024*8)
    bytes = stream.readBytes(address, size//8)
    return guessBytesCharset(bytes, default)
Beispiel #6
0
def guessStreamCharset(stream, address, size, default=None):
    size = min(size, 1024 * 8)
    bytes = stream.readBytes(address, size // 8)
    return guessBytesCharset(bytes, default)
 def _guessCharset(self):
     addr = self.absolute_address + self._content_offset * 8
     bytes = self._parent.stream.readBytes(addr, self._content_size)
     return guessBytesCharset(bytes, default=FALLBACK_CHARSET)
Beispiel #8
0
 def _guessCharset(self):
     addr = self.absolute_address + self._content_offset * 8
     bytes = self._parent.stream.readBytes(addr, self._content_size)
     return guessBytesCharset(bytes, default=FALLBACK_CHARSET)