Example #1
0
def detect_encoding(f):
    detector = UniversalDetector()
    detector.reset()
    for line in f:
        detector.feed(line)
        if detector.done:
            break
    detector.close()
    return detector.result["encoding"]
Example #2
0
def detect_encoding(path):
    detector = UniversalDetector()
    detector.reset()
    with open(path, "rb") as f:
        for line in f:
            detector.feed(line)
            if detector.done:
                break
    detector.close()
    return detector.result["encoding"]
Example #3
0
def detect_convert(filename):
    detector = UniversalDetector()
    detector.reset()
    cache = b''
    with open(filename, 'rb') as f:
        for line in f:
            detector.feed(line)
            cache += line
            if detector.done:
                break
        detector.close()
        cache = cache.decode(detector.result['encoding'] or args.fallback_enc,
                             errors='ignore')
        cache += f.read().decode(detector.result['encoding']
                                 or args.fallback_enc,
                                 errors='ignore')
        cf = convertfunc(cache, args.locale, args.locale_only)
        return cf(cache)
Example #4
0
def detect_convert(filename):
    detector = UniversalDetector()
    detector.reset()
    cache = b''
    with open(filename, 'rb') as f:
        for line in f:
            detector.feed(line)
            cache += line
            if detector.done:
                break
        detector.close()
        cache = cache.decode(
            detector.result['encoding'] or args.fallback_enc,
            errors='ignore')
        cache += f.read().decode(
            detector.result['encoding'] or args.fallback_enc,
            errors='ignore')
        cf = convertfunc(cache, args.locale, args.locale_only)
        return cf(cache)
Example #5
0
    from chardet.universaldetector import UniversalDetector

detector = UniversalDetector()

arg = sys.argv[1:]
if arg:
    fns = list(filter(os.path.isfile, arg))
else:
    fns = ['-']

for fn in fns:
    if fn == '-':
        stream = sys.stdin.buffer
    else:
        stream = open(fn, 'rb')
    detector.reset()
    cache = []
    for line in stream:
        detector.feed(line)
        cache.append(line)
        if detector.done:
            break
    detector.close()
    for line in cache:
        sys.stdout.write(
            line.decode(detector.result['encoding'] or 'utf-8',
                        errors='replace'))
    for line in stream:
        sys.stdout.write(
            line.decode(detector.result['encoding'] or 'utf-8',
                        errors='replace'))