def get_coding(text): """ Function to get the coding of a text. @param text text to inspect (string) @return coding string """ for line in text.splitlines()[:2]: try: result = CODING_RE.search(to_text_string(line)) except UnicodeDecodeError: # This could fail because to_text_string assume the text is # utf8-like and we don't know the encoding to give it to # to_text_string pass else: if result: codec = result.group(1) # sometimes we find a false encoding that can result in errors if codec in CODECS: return codec # Fallback using chardet if is_binary_string(text): detector = UniversalDetector() for line in text.splitlines()[:2]: detector.feed(line) if detector.done: break detector.close() return detector.result['encoding'] return None
def to_unicode_from_fs(string): """ Return a unicode version of string decoded using the file system encoding. """ if not is_string(string): # string is a QString string = to_text_string(string.toUtf8(), 'utf-8') else: if is_binary_string(string): try: unic = string.decode(FS_ENCODING) except (UnicodeError, TypeError): pass else: return unic return string