class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_kr' tstring = test_multibytecodec_support.load_teststring('euc_kr') codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"), # composed make-up sequence errors ("\xa4\xd4", "strict", None), ("\xa4\xd4\xa4", "strict", None), ("\xa4\xd4\xa4\xb6", "strict", None), ("\xa4\xd4\xa4\xb6\xa4", "strict", None), ("\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None), ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None), ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", u"\uc4d4"), ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", u"\uc4d4x"), ("a\xa4\xd4\xa4\xb6\xa4", "replace", u"a\ufffd"), ("\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None), ("\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None), ("\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None), ("\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", u"\ufffd"), ("\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", u"\ufffd"), ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", u"\ufffd"), ("\xc1\xc4", "strict", u"\uc894"), )
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jis' tstring = test_multibytecodec_support.load_teststring('shift_jis') codectests = shiftjis_commonenctests + ( (b"\\\x7e", "strict", "\\\x7e"), (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), )
class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'hz' tstring = test_multibytecodec_support.load_teststring('hz') codectests = ( # test '~\n' (3 lines) (b'This sentence is in ASCII.\n' b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' b'~{NpJ)l6HK!#~}Bye.\n', 'strict', u'This sentence is in ASCII.\n' u'The next sentence is in GB.' u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' u'Bye.\n'), # test '~\n' (4 lines) (b'This sentence is in ASCII.\n' b'The next sentence is in GB.~\n' b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' b'Bye.\n', 'strict', u'This sentence is in ASCII.\n' u'The next sentence is in GB.' u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' u'Bye.\n'), # invalid bytes (b'ab~cd', 'replace', u'ab\uFFFDd'), (b'ab\xffcd', 'replace', u'ab\uFFFDcd'), (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'), )
class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'big5' tstring = test_multibytecodec_support.load_teststring('big5') if RESYNC_FASTER: # Version from CPython 3.6 where \0x80\0x80 is two invalid sequences. # Java 8 agrees with this interpretation. codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ufffd\u8b10"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"), ) else: # Standard version of test from CPython 2.7 codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"), )
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jp' tstring = test_multibytecodec_support.load_teststring('euc_jp') codectests = eucjp_commontests + ( ("\xa1\xc0\\", "strict", u"\uff3c\\"), (u"\xa5", "strict", "\x5c"), (u"\u203e", "strict", "\x7e"), )
class Test_ISO2022_KR(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = test_multibytecodec_support.load_teststring('iso2022_kr') codectests = COMMON_CODEC_TESTS + ( (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), ) # iso2022_kr.txt cannot be used to test "chunk coding": the escape # sequence is only written on the first line def test_chunkcoding(self): pass
class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'big5' tstring = test_multibytecodec_support.load_teststring('big5') codectests = ( # invalid bytes (b"abc\x80\x80\xc1\xc4", "strict", None), (b"abc\xc8", "strict", None), (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"), (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"), (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"), )
class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'johab' tstring = test_multibytecodec_support.load_teststring('johab') codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"), )
class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'cp949' tstring = test_multibytecodec_support.load_teststring('cp949') codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"), )
class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'gb2312' tstring = test_multibytecodec_support.load_teststring('gb2312') codectests = ( # invalid bytes ("abc\x81\x81\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"), ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"), ("\xc1\x64", "strict", None), )
class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'gbk' tstring = test_multibytecodec_support.load_teststring('gbk') codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), ("\x83\x34\x83\x31", "strict", None), (u"\u30fb", "strict", None), )
class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'gb18030' tstring = test_multibytecodec_support.load_teststring('gb18030') codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"), ) has_iso10646 = True
class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'cp932' tstring = test_multibytecodec_support.load_teststring('shift_jis') codectests = ( # invalid bytes ("abc\x81\x00\x81\x00\x82\x84", "strict", None), ("abc\xf8", "strict", None), ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"), ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"), # sjis vs cp932 ("\\\x7e", "replace", u"\\\x7e"), ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"), )
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'shift_jisx0213' tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') codectests = ( # invalid bytes ("abc\x80\x80\x82\x84", "strict", None), ("abc\xf8", "strict", None), ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), # sjis vs cp932 ("\\\x7e", "replace", u"\xa5\u203e"), ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"), ) xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a", "\x85Gℜ\x85Q = ⟨ሴ⟩")
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'euc_jisx0213' tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') codectests = ( # invalid bytes ("abc\x80\x80\xc1\xc4", "strict", None), ("abc\xc8", "strict", None), ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), ("\xc1\x64", "strict", None), ("\xa1\xc0", "strict", u"\uff3c"), ) xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a", "\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩")
class Test_ISO2022_JP2(test_multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_jp_2' tstring = test_multibytecodec_support.load_teststring('iso2022_jp') codectests = COMMON_CODEC_TESTS + ((b'ab\x1BNdef', 'replace', 'abdef'), )