def _convertText(self, text): if not self._charset: # charset is still unknown: guess the charset self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET) # Try to convert to Unicode try: return str(text, self._charset, "strict") except UnicodeDecodeError as exc: err = exc # --- Conversion error --- # Fix truncated UTF-16 string like 'B\0e' (3 bytes) # => Add missing nul byte: 'B\0e\0' (4 bytes) if err.reason == "truncated data" \ and err.end == len(text) \ and self._charset == "UTF-16-LE": try: text = str(text + b"\0", self._charset, "strict") self.warning("Fix truncated %s string: add missing nul byte" % self._charset) return text except UnicodeDecodeError: pass # On error, use FALLBACK_CHARSET self.warning("Unable to convert string to Unicode: %s" % err) return str(text, FALLBACK_CHARSET, "strict")
def _convertText(self, text): if not self._charset: # charset is still unknown: guess the charset self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET) # Try to convert to Unicode try: return unicode(text, self._charset, "strict") except UnicodeDecodeError as err: pass # --- Conversion error --- # Fix truncated UTF-16 string like 'B\0e' (3 bytes) # => Add missing nul byte: 'B\0e\0' (4 bytes) if err.reason == "truncated data" \ and err.end == len(text) \ and self._charset == "UTF-16-LE": try: text = unicode(text + "\0", self._charset, "strict") self.warning("Fix truncated %s string: add missing nul byte" % self._charset) return text except UnicodeDecodeError, err: pass
def _guessCharset(self): addr = self.absolute_address + self._content_offset * 8 bytes = self._parent.stream.readBytes(addr, self._content_size) return guessBytesCharset(bytes, default=FALLBACK_CHARSET)
def guessStreamCharset(stream, address, size, default=None): size = min(size, 1024 * 8) bytes = stream.readBytes(address, size // 8) return guessBytesCharset(bytes, default)