Example #1
0
    def _convertText(self, text):
        if not self._charset:
            # charset is still unknown: guess the charset
            self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)

        # Try to convert to Unicode
        try:
            return str(text, self._charset, "strict")
        except UnicodeDecodeError as exc:
            err = exc

        # --- Conversion error ---

        # Fix truncated UTF-16 string like 'B\0e' (3 bytes)
        # => Add missing nul byte: 'B\0e\0' (4 bytes)
        if err.reason == "truncated data" \
                and err.end == len(text) \
                and self._charset == "UTF-16-LE":
            try:
                text = str(text + b"\0", self._charset, "strict")
                self.warning("Fix truncated %s string: add missing nul byte" %
                             self._charset)
                return text
            except UnicodeDecodeError:
                pass

        # On error, use FALLBACK_CHARSET
        self.warning("Unable to convert string to Unicode: %s" % err)
        return str(text, FALLBACK_CHARSET, "strict")
Example #2
0
    def _convertText(self, text):
        if not self._charset:
            # charset is still unknown: guess the charset
            self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET)

        # Try to convert to Unicode
        try:
            return unicode(text, self._charset, "strict")
        except UnicodeDecodeError as err:
            pass

        # --- Conversion error ---

        # Fix truncated UTF-16 string like 'B\0e' (3 bytes)
        # => Add missing nul byte: 'B\0e\0' (4 bytes)
        if err.reason == "truncated data" \
                and err.end == len(text) \
                and self._charset == "UTF-16-LE":
            try:
                text = unicode(text + "\0", self._charset, "strict")
                self.warning("Fix truncated %s string: add missing nul byte" % self._charset)
                return text
            except UnicodeDecodeError, err:
                pass
Example #3
0
 def _guessCharset(self):
     addr = self.absolute_address + self._content_offset * 8
     bytes = self._parent.stream.readBytes(addr, self._content_size)
     return guessBytesCharset(bytes, default=FALLBACK_CHARSET)
Example #4
0
def guessStreamCharset(stream, address, size, default=None):
    size = min(size, 1024 * 8)
    bytes = stream.readBytes(address, size // 8)
    return guessBytesCharset(bytes, default)
Example #5
0
 def _guessCharset(self):
     addr = self.absolute_address + self._content_offset * 8
     bytes = self._parent.stream.readBytes(addr, self._content_size)
     return guessBytesCharset(bytes, default=FALLBACK_CHARSET)
Example #6
0
def guessStreamCharset(stream, address, size, default=None):
    size = min(size, 1024 * 8)
    bytes = stream.readBytes(address, size // 8)
    return guessBytesCharset(bytes, default)