コード例 #1
0
class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'hz'
    tstring = multibytecodec_support.load_teststring('hz')
    codectests = (
        # test '~\n' (3 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
         b'~{NpJ)l6HK!#~}Bye.\n', 'strict', 'This sentence is in ASCII.\n'
         'The next sentence is in GB.'
         '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         'Bye.\n'),
        # test '~\n' (4 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~\n'
         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
         b'Bye.\n', 'strict', 'This sentence is in ASCII.\n'
         'The next sentence is in GB.'
         '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         'Bye.\n'),
        # invalid bytes
        (b'ab~cd', 'replace', 'ab\uFFFDcd'),
        (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
        (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
        (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
        # issue 30003
        ('ab~cd', 'strict', b'ab~~cd'),  # escape ~
        (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
        (b'~{Dc~\n:C~}', 'strict', None),  # ~\n only in ASCII mode
    )
コード例 #2
0
class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        ("\\\x7e", "strict", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
    )
コード例 #3
0
class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb2312'
    tstring = multibytecodec_support.load_teststring('gb2312')
    codectests = (b'abc\x81\x81\xc1\xc4', 'strict', None), (b'abc\xc8',
        'strict', None), (b'abc\x81\x81\xc1\xc4', 'replace', 'abc��聊'), (
        b'abc\x81\x81\xc1\xc4\xc8', 'replace', 'abc��聊�'), (
        b'abc\x81\x81\xc1\xc4', 'ignore', 'abc聊'), (b'\xc1d', 'strict', None)
コード例 #4
0
class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_jp'
    tstring = multibytecodec_support.load_teststring('euc_jp')
    codectests = euc_commontests + (
        ("\xa5", "strict", b"\x5c"),
        ("\u203e", "strict", b"\x7e"),
    )
コード例 #5
0
class Test_CP949(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp949'
    tstring = multibytecodec_support.load_teststring('cp949')
    codectests = (b'abc\x80\x80\xc1\xc4', 'strict', None), (b'abc\xc8',
        'strict', None), (b'abc\x80\x80\xc1\xc4', 'replace', 'abc��좔'), (
        b'abc\x80\x80\xc1\xc4\xc8', 'replace', 'abc��좔�'), (
        b'abc\x80\x80\xc1\xc4', 'ignore', 'abc좔')
コード例 #6
0
class Test_EUCKR(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_kr'
    tstring = multibytecodec_support.load_teststring('euc_kr')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),

        # composed make-up sequence errors
        (b"\xa4\xd4", "strict", None),
        (b"\xa4\xd4\xa4", "strict", None),
        (b"\xa4\xd4\xa4\xb6", "strict", None),
        (b"\xa4\xd4\xa4\xb6\xa4", "strict", None),
        (b"\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None),
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
        (b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
        (b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
        (b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
        (b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace",
         '\ufffd\u6e21\ufffd\u3160\ufffd'),
        (b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace",
         '\ufffd\u6e21\ub544\ufffd\ufffd'),
        (b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace",
         '\ufffd\u6e21\ub544\u572d\ufffd'),
        (b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace",
         '\ufffd\ufffd\ufffd\uc4d4'),
        (b"\xc1\xc4", "strict", "\uc894"),
    )
コード例 #7
0
class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = shiftjis_commonenctests + (
        (b'abc\x80\x80\x82\x84', 'replace', 'abc��d'),
        (b'abc\x80\x80\x82\x84\x88', 'replace', 'abc��d�'),
        (b'\\~', 'replace', '¥‾'), (b'\x81_\x81a\x81|', 'replace', '\\‖−'))
    xmlcharnametest = '«ℜ» = 〈ሴ〉', b'\x85G&real;\x85Q = &lang;&#4660;&rang;'
コード例 #8
0
class Test_Big5HKSCS(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5hkscs'
    tstring = multibytecodec_support.load_teststring('big5hkscs')
    codectests = (b'abc\x80\x80\xc1\xc4', 'strict', None), (
        b'abc\xc8', 'strict',
        None), (b'abc\x80\x80\xc1\xc4', 'replace',
                'abc��謐'), (b'abc\x80\x80\xc1\xc4\xc8', 'replace',
                            'abc��謐�'), (b'abc\x80\x80\xc1\xc4', 'ignore',
                                         'abc謐')
コード例 #9
0
class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'iso2022_kr'
    tstring = multibytecodec_support.load_teststring('iso2022_kr')
    codectests = COMMON_CODEC_TESTS + ((b'ab\x1bNdef', 'replace',
        'ab\x1bNdef'),)

    @unittest.skip('iso2022_kr.txt cannot be used to test "chunk coding"')
    def test_chunkcoding(self):
        pass
コード例 #10
0
class Test_JOHAB(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = multibytecodec_support.load_teststring('johab')
    codectests = (b'abc\x80\x80\xc1\xc4', 'strict', None), (b'abc\xc8',
        'strict', None), (b'abc\x80\x80\xc1\xc4', 'replace', 'abc��촧'), (
        b'abc\x80\x80\xc1\xc4\xc8', 'replace', 'abc��촧�'), (
        b'abc\x80\x80\xc1\xc4', 'ignore', 'abc촧'), (b'\xd8abc', 'replace',
        '�abc'), (b'\xd8\xffabc', 'replace', '��abc'), (b'\x84bxy',
        'replace', '�bxy'), (b'\x8cBxy', 'replace', '�Bxy')
コード例 #11
0
class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b'abc\x80\x80\x82\x84', 'replace', 'abc��d'),
        (b'abc\x80\x80\x82\x84\x88', 'replace', 'abc��d�'),
        (b'\\~', 'strict', '\\~'), (b'\x81_\x81a\x81|', 'strict', '\‖−'),
        (b'abc\x819', 'replace', 'abc�9'),
        (b'abc\xea\xfc', 'replace', 'abc��'),
        (b'abc\xffX', 'replace', 'abc�X'))
コード例 #12
0
class Test_JOHAB(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"),
    )
コード例 #13
0
class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis_2004'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b'\\~', 'strict', '¥‾'), (b'\x81_\x81a\x81|', 'strict', '\\‖−'),
        (b'abc\xea\xfc', 'strict', 'abc撿'), (b'\x819xy', 'replace', '�9xy'),
        (b'\xffXxy', 'replace', '�Xxy'),
        (b'\x80\x80\x82\x84xy', 'replace', '��dxy'),
        (b'\x80\x80\x82\x84\x88xy', 'replace', '��d塤y'),
        (b'\xfc\xfbxy', 'replace', '�閴y'))
    xmlcharnametest = '«ℜ» = 〈ሴ〉', b'\x85G&real;\x85Q = &lang;&#4660;&rang;'
コード例 #14
0
class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'iso2022_kr'
    tstring = multibytecodec_support.load_teststring('iso2022_kr')
    codectests = COMMON_CODEC_TESTS + (
        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), )

    # iso2022_kr.txt cannot be used to test "chunk coding": the escape
    # sequence is only written on the first line
    @unittest.skip('iso2022_kr.txt cannot be used to test "chunk coding"')
    def test_chunkcoding(self):
        pass
コード例 #15
0
class Test_CP949(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp949'
    tstring = multibytecodec_support.load_teststring('cp949')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
    )
コード例 #16
0
class Test_Big5HKSCS(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5hkscs'
    tstring = multibytecodec_support.load_teststring('big5hkscs')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
    )
コード例 #17
0
class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
        (b"\\\x7e", "strict", "\\\x7e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
        (b"abc\x81\x39", "replace", "abc\ufffd9"),
        (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
        (b"abc\xFF\x58", "replace", "abc\ufffdX"),
    )
コード例 #18
0
class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb2312'
    tstring = multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
        (b"abc\x81\x81\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
        (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
        (b"\xc1\x64", "strict", None),
    )
コード例 #19
0
class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = shiftjis_commonenctests + (
        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),

        # sjis vs cp932
        (b"\\\x7e", "replace", "\xa5\u203e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
    )
    xmlcharnametest = ("\xab\u211c\xbb = \u2329\u1234\u232a",
                       b"\x85G&real;\x85Q = &lang;&#4660;&rang;")
コード例 #20
0
class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gbk'
    tstring = multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
        (b"\x83\x34\x83\x31", "strict", None),
        ("\u30fb", "strict", None),
    )
コード例 #21
0
class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = (b'abc\x81\x00\x81\x00\x82\x84', 'strict', None), (
        b'abc\xf8', 'strict',
        None), (b'abc\x81\x00\x82\x84', 'replace', 'abc�\x00d'), (
            b'abc\x81\x00\x82\x84\x88', 'replace', 'abc�\x00d�'), (
                b'abc\x81\x00\x82\x84', 'ignore',
                'abc\x00d'), (b'ab\xebxy', 'replace',
                              'ab�xy'), (b'ab\xf09xy', 'replace', 'ab�9xy'), (
                                  b'ab\xea\xf0xy', 'replace',
                                  'ab�\ue038y'), (b'\\~', 'replace',
                                                  '\\~'), (b'\x81_\x81a\x81|',
                                                           'replace', '\∥-')
コード例 #22
0
class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
        ("abc\x81\x00\x81\x00\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
    )
コード例 #23
0
class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis_2004'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b"\\\x7e", "strict", "\xa5\u203e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
        (b"abc\xEA\xFC", "strict", "abc\u64bf"),
        (b"\x81\x39xy", "replace", "\ufffd9xy"),
        (b"\xFF\x58xy", "replace", "\ufffdXxy"),
        (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
        (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
        (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
    )
    xmlcharnametest = ("\xab\u211c\xbb = \u2329\u1234\u232a",
                       b"\x85G&real;\x85Q = &lang;&#4660;&rang;")
コード例 #24
0
class Test_JOHAB(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
        (b"\xD8abc", "replace", "\uFFFDabc"),
        (b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"),
        (b"\x84bxy", "replace", "\uFFFDbxy"),
        (b"\x8CBxy", "replace", "\uFFFDBxy"),
    )
コード例 #25
0
class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb18030'
    tstring = multibytecodec_support.load_teststring('gb18030')
    codectests = (b'abc\x80\x80\xc1\xc4', 'strict', None), (b'abc\xc8',
        'strict', None), (b'abc\x80\x80\xc1\xc4', 'replace', 'abc��聊'), (
        b'abc\x80\x80\xc1\xc4\xc8', 'replace', 'abc��聊�'), (
        b'abc\x80\x80\xc1\xc4', 'ignore', 'abc聊'), (b'abc\x849\x849\xc1\xc4',
        'replace', 'abc�9�9聊'), ('・', 'strict', b'\x819\xa79'), (
        b'abc\x842\x80\x80def', 'replace', 'abc�2��def'), (b'abc\x810\x810def',
        'strict', 'abc\x80def'), (b'abc\x860\x810def', 'replace', 'abc�0�0def'
        ), (b'\xff0\x810', 'strict', None), (b'\x810\xff0', 'strict', None), (
        b'abc\x819\xff9\xc1\xc4', 'replace', 'abc�9�9聊'), (b'abc\xab6\xff0def',
        'replace', 'abc�6�0def'), (b'abc\xbf8\xff2\xc1\xc4', 'ignore', 'abc82聊'
        )
    has_iso10646 = True
コード例 #26
0
class Test_EUC_JISX0213(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_jisx0213'
    tstring = multibytecodec_support.load_teststring('euc_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
        ("\xc1\x64", "strict", None),
        ("\xa1\xc0", "strict", u"\uff3c"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;")
コード例 #27
0
class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\xa5\u203e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\x85G&real;\x85Q = &lang;&#4660;&rang;")
コード例 #28
0
class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
        (b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
        (b"abc\xf8", "strict", None),
        (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
        (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
        (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
        (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
        (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
        (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
        # sjis vs cp932
        (b"\\\x7e", "replace", "\\\x7e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
    )
コード例 #29
0
class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb18030'
    tstring = multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
        (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
        ("\u30fb", "strict", b"\x819\xa79"),
        (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
        (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
        (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
    )
    has_iso10646 = True
コード例 #30
0
class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'hz'
    tstring = multibytecodec_support.load_teststring('hz')
    codectests = (
        b'This sentence is in ASCII.\nThe next sentence is in GB.~{<:Ky2;S{#,~}~\n~{NpJ)l6HK!#~}Bye.\n'
        , 'strict',
        """This sentence is in ASCII.
The next sentence is in GB.己所不欲,勿施於人。Bye.
"""
        ), (
        b'This sentence is in ASCII.\nThe next sentence is in GB.~\n~{<:Ky2;S{#,NpJ)l6HK!#~}~\nBye.\n'
        , 'strict',
        """This sentence is in ASCII.
The next sentence is in GB.己所不欲,勿施於人。Bye.
"""
        ), (b'ab~cd', 'replace', 'ab�cd'), (b'ab\xffcd', 'replace', 'ab�cd'), (
        b'ab~{\x81\x81AD~}cd', 'replace', 'ab��聊cd'), (b'ab~{AD~}cd',
        'replace', 'ab聊cd'), (b'ab~{yyAD~}cd', 'replace', 'ab��聊cd'), ('ab~cd',
        'strict', b'ab~~cd'), (b'~{Dc~~:C~}', 'strict', None), (b'~{Dc~\n:C~}',
        'strict', None)