Ejemplo n.º 1
0
def get_coding(text):
    """
    Function to get the coding of a text.
    @param text text to inspect (string)
    @return coding string
    """
    for line in text.splitlines()[:2]:
        try:
            result = CODING_RE.search(to_text_string(line))
        except UnicodeDecodeError:
            # This could fail because to_text_string assume the text is
            # utf8-like and we don't know the encoding to give it to
            # to_text_string
            pass
        else:
            if result:
                codec = result.group(1)
                # sometimes we find a false encoding that can result in errors
                if codec in CODECS:
                    return codec

    # Fallback using chardet
    if is_binary_string(text):
        detector = UniversalDetector()
        for line in text.splitlines()[:2]:
            detector.feed(line)
            if detector.done:
                break

        detector.close()
        return detector.result['encoding']

    return None
Ejemplo n.º 2
0
def to_unicode_from_fs(string):
    """
    Return a unicode version of string decoded using the file system encoding.
    """
    if not is_string(string):  # string is a QString
        string = to_text_string(string.toUtf8(), 'utf-8')
    else:
        if is_binary_string(string):
            try:
                unic = string.decode(FS_ENCODING)
            except (UnicodeError, TypeError):
                pass
            else:
                return unic
    return string