def testVim(self): e = guess_encoding('# vim:fileencoding=UTF-8') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(''' ### vim:fileencoding=ISO-8859-1 ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding(''' ### vim:fileencoding= ISO-8859-1 ''') self.failUnlessEqual(e, None)
def testEmacs(self): e = guess_encoding('# -*- coding: UTF-8 -*-') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding('# -*- coding:UTF-8 -*-') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(''' ### -*- coding: ISO-8859-1 -*- ''') self.failUnlessEqual(e, 'ISO-8859-1') e = guess_encoding(''' ### -*- coding: ISO-8859-1 -*- ''') self.failUnlessEqual(e, None)
def process_module(self, stream): """extracts encoding from the stream and decodes each line, so that international text's length is properly calculated. """ line_reader = stream.readline if sys.version_info < (3, 0): data = stream.read() if not is_ascii(data)[0]: encoding = guess_encoding(data) if encoding is not None: line_reader = lambda: stream.readline().decode(encoding, 'replace') del data stream.seek(0) self.process_tokens(tokenize.generate_tokens(line_reader))
def process_module(self, stream): """extracts encoding from the stream and decodes each line, so that international text's lenght properly calculated. """ data = stream.read() line_generator = stream.readline ascii, lineno = is_ascii(data) if not ascii: encoding = guess_encoding(data) if encoding is not None: line_generator = lambda: stream.readline().decode(encoding, 'replace') del data stream.seek(0) self.process_tokens(tokenize.generate_tokens(line_generator))
def testUTF8(self): e = guess_encoding('\xef\xbb\xbf any UTF-8 data') self.failUnlessEqual(e, 'UTF-8') e = guess_encoding(' any UTF-8 data \xef\xbb\xbf') self.failUnlessEqual(e, None)