def _convertText(self, text): if not self._charset: # charset is still unknown: guess the charset self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET) # Try to convert to Unicode try: return unicode(text, self._charset, "strict") except UnicodeDecodeError as err: pass #--- Conversion error --- # Fix truncated UTF-16 string like 'B\0e' (3 bytes) # => Add missing nul byte: 'B\0e\0' (4 bytes) if err.reason == "truncated data" \ and err.end == len(text) \ and self._charset == "UTF-16-LE": try: text = unicode(text+"\0", self._charset, "strict") self.warning("Fix truncated %s string: add missing nul byte" % self._charset) return text except UnicodeDecodeError as err: pass # On error, use FALLBACK_CHARSET self.warning(u"Unable to convert string to Unicode: %s" % err) return unicode(text, FALLBACK_CHARSET, "strict")
def _convertText(self, text): if not self._charset: # charset is still unknown: guess the charset self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET) # Try to convert to Unicode try: return unicode(text, self._charset, "strict") except UnicodeDecodeError, err: pass
def _convertText(self, text): if not self._charset: # charset is still unknown: guess the charset self._charset = guessBytesCharset(text, default=FALLBACK_CHARSET) # Try to convert to Unicode try: return unicode(text, self._charset, "strict") except UnicodeDecodeError, err: pass
def _convertText(self, text, charset): # No charset: use fallback charset if not charset: charset = guessBytesCharset(text, default=None) if not charset: return unicode(text, FALLBACK_CHARSET, "strict") # Try to convert to Unicode try: return unicode(text, charset, "strict") except UnicodeDecodeError, err: pass
def guessStreamCharset(stream, address, size, default=None): size = min(size, 1024*8) bytes = stream.readBytes(address, size//8) return guessBytesCharset(bytes, default)
def guessStreamCharset(stream, address, size, default=None): size = min(size, 1024 * 8) bytes = stream.readBytes(address, size // 8) return guessBytesCharset(bytes, default)
def _guessCharset(self): addr = self.absolute_address + self._content_offset * 8 bytes = self._parent.stream.readBytes(addr, self._content_size) return guessBytesCharset(bytes, default=FALLBACK_CHARSET)
def _guessCharset(self): addr = self.absolute_address + self._content_offset * 8 bytes = self._parent.stream.readBytes(addr, self._content_size) return guessBytesCharset(bytes, default=FALLBACK_CHARSET)