Python load_teststring 예제들, test.test_multibytecodec_support.load_teststring Python 예제들

예제 #1

0

파일 보기

class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'euc_kr'
    tstring = test_multibytecodec_support.load_teststring('euc_kr')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),

        # composed make-up sequence errors
        ("\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", u"\uc4d4"),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", u"\uc4d4x"),
        ("a\xa4\xd4\xa4\xb6\xa4", "replace", u"a\ufffd"),
        ("\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
        ("\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", u"\ufffd"),
        ("\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", u"\ufffd"),
        ("\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", u"\ufffd"),
        ("\xc1\xc4", "strict", u"\uc894"),
    )

예제 #2

0

파일 보기

class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jis'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = shiftjis_commonenctests + (
        (b"\\\x7e", "strict", "\\\x7e"),
        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
    )

예제 #3

0

파일 보기

class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'hz'
    tstring = test_multibytecodec_support.load_teststring('hz')
    codectests = (
        # test '~\n' (3 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
         b'~{NpJ)l6HK!#~}Bye.\n',
         'strict',
         u'This sentence is in ASCII.\n'
         u'The next sentence is in GB.'
         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         u'Bye.\n'),
        # test '~\n' (4 lines)
        (b'This sentence is in ASCII.\n'
         b'The next sentence is in GB.~\n'
         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
         b'Bye.\n',
         'strict',
         u'This sentence is in ASCII.\n'
         u'The next sentence is in GB.'
         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
         u'Bye.\n'),
        # invalid bytes
        (b'ab~cd', 'replace', u'ab\uFFFDd'),
        (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
    )

예제 #4

0

파일 보기

class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    if RESYNC_FASTER:
        # Version from CPython 3.6 where \0x80\0x80 is two invalid sequences.
        # Java 8 agrees with this interpretation.
        codectests = (
            # invalid bytes
            (b"abc\x80\x80\xc1\xc4", "strict", None),
            (b"abc\xc8", "strict", None),
            (b"abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ufffd\u8b10"),
            (b"abc\x80\x80\xc1\xc4\xc8", "replace",
             u"abc\ufffd\ufffd\u8b10\ufffd"),
            (b"abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
        )
    else:
        # Standard version of test from CPython 2.7
        codectests = (
            # invalid bytes
            ("abc\x80\x80\xc1\xc4", "strict", None),
            ("abc\xc8", "strict", None),
            ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
            ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
            ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u8b10"),
        )

예제 #5

0

파일 보기

class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
                         unittest.TestCase):
    encoding = 'euc_jp'
    tstring = test_multibytecodec_support.load_teststring('euc_jp')
    codectests = eucjp_commontests + (
        ("\xa1\xc0\\", "strict", u"\uff3c\\"),
        (u"\xa5", "strict", "\x5c"),
        (u"\u203e", "strict", "\x7e"),
    )

예제 #6

0

파일 보기

class Test_ISO2022_KR(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'iso2022_kr'
    tstring = test_multibytecodec_support.load_teststring('iso2022_kr')
    codectests = COMMON_CODEC_TESTS + (
        (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), )

    # iso2022_kr.txt cannot be used to test "chunk coding": the escape
    # sequence is only written on the first line
    def test_chunkcoding(self):
        pass

예제 #7

0

파일 보기

파일: test_codecencodings_tw.py 프로젝트: moussmaw1/python3.0-experiments

class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'big5'
    tstring = test_multibytecodec_support.load_teststring('big5')
    codectests = (
        # invalid bytes
        (b"abc\x80\x80\xc1\xc4", "strict", None),
        (b"abc\xc8", "strict", None),
        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
        (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
    )

예제 #8

0

파일 보기

class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'johab'
    tstring = test_multibytecodec_support.load_teststring('johab')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\ucd27"),
    )

예제 #9

0

파일 보기

class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp949'
    tstring = test_multibytecodec_support.load_teststring('cp949')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\uc894"),
    )

예제 #10

0

파일 보기

파일: test_codecencodings_cn.py 프로젝트: heroarthur/Minix

class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb2312'
    tstring = test_multibytecodec_support.load_teststring('gb2312')
    codectests = (
        # invalid bytes
        ("abc\x81\x81\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"),
        ("\xc1\x64", "strict", None),
    )

예제 #11

0

파일 보기

파일: test_codecencodings_cn.py 프로젝트: heroarthur/Minix

class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gbk'
    tstring = test_multibytecodec_support.load_teststring('gbk')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
        ("\x83\x34\x83\x31", "strict", None),
        (u"\u30fb", "strict", None),
    )

예제 #12

0

파일 보기

class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'gb18030'
    tstring = test_multibytecodec_support.load_teststring('gb18030')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"),
        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
    )
    has_iso10646 = True

예제 #13

0

파일 보기

class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'cp932'
    tstring = test_multibytecodec_support.load_teststring('shift_jis')
    codectests = (
        # invalid bytes
        ("abc\x81\x00\x81\x00\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\\\x7e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
    )

예제 #14

0

파일 보기

class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
    encoding = 'shift_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\x82\x84", "strict", None),
        ("abc\xf8", "strict", None),
        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
        ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"),
        # sjis vs cp932
        ("\\\x7e", "replace", u"\xa5\u203e"),
        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\x85G&real;\x85Q = &lang;&#4660;&rang;")

예제 #15

0

파일 보기

class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
                        unittest.TestCase):
    encoding = 'euc_jisx0213'
    tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
    codectests = (
        # invalid bytes
        ("abc\x80\x80\xc1\xc4", "strict", None),
        ("abc\xc8", "strict", None),
        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
        ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"),
        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
        ("\xc1\x64", "strict", None),
        ("\xa1\xc0", "strict", u"\uff3c"),
    )
    xmlcharnametest = (u"\xab\u211c\xbb = \u2329\u1234\u232a",
                       "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;")

예제 #16

0

파일 보기

class Test_ISO2022_JP2(test_multibytecodec_support.TestBase,
                       unittest.TestCase):
    encoding = 'iso2022_jp_2'
    tstring = test_multibytecodec_support.load_teststring('iso2022_jp')
    codectests = COMMON_CODEC_TESTS + ((b'ab\x1BNdef', 'replace', 'abdef'), )