Example #1
0
        def _unicode(content):
            result = None
            # Try charset from content-type
            encoding = self.encoding

            if not content:
                return _str('')

            # Fallback to auto-detected encoding.
            if self.encoding is None:
                encoding = chardet.detect(content)['encoding']

            # Decode unicode from given encoding.
            try:
                result = _str(content, encoding, errors='replace')
            except (LookupError, TypeError):
                # A LookupError is raised if the encoding was not found which could
                # indicate a misspelling or similar mistake.
                #
                # A TypeError can be raised if encoding is None
                #
                # So we try blindly encoding.
                result = _str(content, errors='replace')

            return result
Example #2
0
def auto_detect(text):
    charset = None

    # Try a rapid detection using filemagic
    with magic.Magic() as m:
        magic_print = m.id_buffer(text)

    # UTF-8 is rapidly detected, but if the result is not UTF-8,
    # we use the more precise, but slower, chardet.
    if magic_print is not None and "UTF-8" in magic_print:
        charset = "utf-8"
    else:
        charset = chardet.detect(text)['encoding']

    if charset is not None:
        charset = charset.lower()

    return charset
Example #3
0
def _autodetect_encoding(binary_data):
    return chardet.detect(binary_data)["encoding"]
Example #4
0
 def _encoding(content):
     return chardet.detect(content)['encoding']
Example #5
0
def _autodetect_encoding(binary_data):
    return chardet.detect(binary_data)['encoding']
Example #6
0
 def guess_encoding(s):
     chardet = get_chardet_module()
     if chardet:
         encoding = chardet.detect(s)['encoding']
     return encoding or 'utf-8'