コード例 #1
0
ファイル: test_csv_reader.py プロジェクト: miarka/zeus
 def test007_sample_picker_does_not_break_utf16be(self):
     # iso8859-7 doesn't have to be tested because every character is
     # encoded in 1 byte and cannot be broken
     # We test utf-16le and utf-16be because the .rpartition can make the
     # string end up with odd number of bytes(broken)
     # We also test the pick_sample with a broken utf-8 and see if it can
     # fix it
     inp = 'a\nb'
     inp = inp.encode('utf-16be')
     sample = pick_sample(inp)
     encoding = get_encoding(sample)
     assert encoding == 'utf-16be'
コード例 #2
0
ファイル: test_csv_reader.py プロジェクト: grnet/zeus
 def test007_sample_picker_does_not_break_utf16be(self):
     # iso8859-7 doesn't have to be tested because every character is
     # encoded in 1 byte and cannot be broken
     # We test utf-16le and utf-16be because the .rpartition can make the
     # string end up with odd number of bytes(broken)
     # We also test the pick_sample with a broken utf-8 and see if it can
     # fix it
     inp = u'a\nb'
     inp = inp.encode('utf-16be')
     sample = pick_sample(inp)
     encoding = get_encoding(sample)
     self.assertEqual(encoding, 'utf-16be')
コード例 #3
0
ファイル: test_csv_reader.py プロジェクト: miarka/zeus
 def test009_sample_picker_fixes_utf_8(self):
     inp = ("The last char is multibyte in utf-8\n"
            "will be trancated ->\u0a01").encode('utf-8')
     broken_inp = inp[:-1]
     #broken_inp is broken and can't be decoded
     isBroken = False
     try:
         broken_inp.decode('utf-8')
     except UnicodeDecodeError:
         isBroken = True
     sample = pick_sample(broken_inp)
     encoding = get_encoding(sample)
     assert (encoding, isBroken) == ('utf-8', True)
コード例 #4
0
ファイル: test_csv_reader.py プロジェクト: grnet/zeus
 def test009_sample_picker_fixes_utf_8(self):
     inp = (u"The last char is multibyte in utf-8\n"
            u"will be trancated ->\u0a01").encode('utf-8')
     broken_inp = inp[:-1]
     #broken_inp is broken and can't be decoded
     isBroken = False
     try:
         broken_inp.decode('utf-8')
     except UnicodeDecodeError:
         isBroken = True
     sample = pick_sample(broken_inp)
     encoding = get_encoding(sample)
     self.assertEqual((encoding, isBroken), ('utf-8', True))
コード例 #5
0
ファイル: test_csv_reader.py プロジェクト: miarka/zeus
 def test008_sample_picker_does_not_break_utf16le(self):
     inp = 'test\u0a01input'
     inp = inp.encode('utf-16le')
     sample = pick_sample(inp)
     encoding = get_encoding(sample)
     assert encoding == 'utf-16le'
コード例 #6
0
ファイル: test_csv_reader.py プロジェクト: grnet/zeus
 def test008_sample_picker_does_not_break_utf16le(self):
     inp = u'test\u0a01input'
     inp = inp.encode('utf-16le')
     sample = pick_sample(inp)
     encoding = get_encoding(sample)
     self.assertEqual(encoding, 'utf-16le')