def detect_encoding(f): detector = UniversalDetector() detector.reset() for line in f: detector.feed(line) if detector.done: break detector.close() return detector.result["encoding"]
def detect_encoding(path): detector = UniversalDetector() detector.reset() with open(path, "rb") as f: for line in f: detector.feed(line) if detector.done: break detector.close() return detector.result["encoding"]
def detect_convert(filename): detector = UniversalDetector() detector.reset() cache = b'' with open(filename, 'rb') as f: for line in f: detector.feed(line) cache += line if detector.done: break detector.close() cache = cache.decode(detector.result['encoding'] or args.fallback_enc, errors='ignore') cache += f.read().decode(detector.result['encoding'] or args.fallback_enc, errors='ignore') cf = convertfunc(cache, args.locale, args.locale_only) return cf(cache)
def detect_convert(filename): detector = UniversalDetector() detector.reset() cache = b'' with open(filename, 'rb') as f: for line in f: detector.feed(line) cache += line if detector.done: break detector.close() cache = cache.decode( detector.result['encoding'] or args.fallback_enc, errors='ignore') cache += f.read().decode( detector.result['encoding'] or args.fallback_enc, errors='ignore') cf = convertfunc(cache, args.locale, args.locale_only) return cf(cache)
from chardet.universaldetector import UniversalDetector detector = UniversalDetector() arg = sys.argv[1:] if arg: fns = list(filter(os.path.isfile, arg)) else: fns = ['-'] for fn in fns: if fn == '-': stream = sys.stdin.buffer else: stream = open(fn, 'rb') detector.reset() cache = [] for line in stream: detector.feed(line) cache.append(line) if detector.done: break detector.close() for line in cache: sys.stdout.write( line.decode(detector.result['encoding'] or 'utf-8', errors='replace')) for line in stream: sys.stdout.write( line.decode(detector.result['encoding'] or 'utf-8', errors='replace'))