예제 #1
0
 def setUp(self):
     self.ucd = UCD()
예제 #2
0
class UCDTests(unittest.TestCase):
    def setUp(self):
        self.ucd = UCD()

    def tearDown(self):
        pass

    def ignore_findit(self):
        from icu import Char, UProperty
        maxchar = 0x10ffff
        maxchar = 0xffff
        for usv in range(maxchar):
            char = chr(usv)
            # if ((not self.ucd.is_specific_script(char)) and
            #    (not self.ucd.is_exemplar_wordbreak(char)) and
            #    (not Char.isUAlphabetic(char))):
            if self.ucd.isformat(char) and not Char.hasBinaryProperty(
                    char, UProperty.DEFAULT_IGNORABLE_CODE_POINT):
                print('%04X' % usv)

        self.assertTrue(False)

    # marks

    def test_mark_true(self):
        self.assertTrue(self.ucd.ismark(u'\u0301'))

    def test_mark_false(self):
        self.assertFalse(self.ucd.ismark(u'e'))

    def test_nukta_true(self):
        self.assertTrue(self.ucd.isnukta(u'\u093c'))

    def test_nukta_false(self):
        self.assertFalse(self.ucd.isnukta(u'\u0915'))

    # always_combine

    def test_nukta_always_combine(self):
        self.assertTrue(self.ucd.is_always_combine(u'\u093c'))

    def test_diacritic_always_combine(self):
        self.assertFalse(self.ucd.is_always_combine(u'\u0300'))

    def test_virama_always_combine(self):
        self.assertFalse(self.ucd.is_always_combine(u'\u0ccd'))

    def test_matra_always_combine(self):
        self.assertFalse(self.ucd.is_always_combine(u'\u093e'))

    # sometimes_combine

    def test_nukta_sometimes_combine(self):
        self.assertFalse(self.ucd.is_sometimes_combine(u'\u093c'))

    def test_diacritic_sometimes_combine(self):
        self.assertTrue(self.ucd.is_sometimes_combine(u'\u0300'))

    def test_virama_sometimes_combine(self):
        self.assertFalse(self.ucd.is_sometimes_combine(u'\u0ccd'))

    def test_matra_sometimes_combine(self):
        self.assertFalse(self.ucd.is_sometimes_combine(u'\u093e'))

    # never_combine

    def test_nukta_never_combine(self):
        self.assertFalse(self.ucd.is_never_combine(u'\u093c'))

    def test_diacritic_never_combine(self):
        self.assertFalse(self.ucd.is_never_combine(u'\u0300'))

    def test_virama_never_combine(self):
        self.assertTrue(self.ucd.is_never_combine(u'\u0ccd'))

    def test_matra_never_combine(self):
        self.assertTrue(self.ucd.is_never_combine(u'\u093e'))

    # other tests

    def test_zwnj_true(self):
        self.assertTrue(self.ucd.is_zwnj(u'\u200c'))

    def test_zwnj_false(self):
        self.assertFalse(self.ucd.is_zwnj(u'\u200d'))

    def test_zwj_true(self):
        self.assertTrue(self.ucd.is_zwj(u'\u200d'))

    def test_zwj_false(self):
        self.assertFalse(self.ucd.is_zwj(u'\u200c'))

    def test_vs_true(self):
        self.assertTrue(self.ucd.is_vs(u'\ufe00'))

    def test_vs_false(self):
        self.assertFalse(self.ucd.is_vs(u'\u1000'))

    def test_number_true(self):
        self.assertTrue(self.ucd.isnumber(u'1'))

    def test_number_false(self):
        self.assertFalse(self.ucd.isnumber(u'a'))

    def test_format_true(self):
        self.assertTrue(self.ucd.isformat(u'\u2060'))

    def test_format_false(self):
        self.assertFalse(self.ucd.isformat(u'a'))

    def test_space_separator_true(self):
        self.assertTrue(self.ucd.is_space_separator(u'\u200a'))

    def test_space_separator_false(self):
        self.assertFalse(self.ucd.is_space_separator(u'a'))

    def test_pua_false_bmp(self):
        self.assertFalse(self.ucd.is_pua(u'a'))

    def test_pua_true_bmp(self):
        self.assertTrue(self.ucd.is_pua(u'\ue000'))

    def test_pua_false_nonbmp(self):
        self.assertFalse(self.ucd.is_pua(u'\U0001D510'))

    def test_pua_true_nonbmp_a(self):
        self.assertTrue(self.ucd.is_pua(u'\U000fff80'))

    def test_pua_true_nonbmp_b(self):
        self.assertTrue(self.ucd.is_pua(u'\U000fff80'))

    def test_script_specific_true_latin(self):
        self.assertTrue(self.ucd.is_specific_script(u'\ua78c'))

    def test_script_specific_false_latin(self):
        self.assertFalse(self.ucd.is_specific_script(u'\u02bc'))

    def test_script_specific_false_chinese(self):
        self.assertFalse(self.ucd.is_specific_script(u'\ua700'))

    def test_script_specific_false_vedic(self):
        self.assertFalse(self.ucd.is_specific_script(u'\u1CD1'))

    def test_wordbreak_katakana(self):
        self.assertTrue(self.ucd.is_exemplar_wordbreak(u'\u309b'))

    def test_wordbreak_aletter(self):
        self.assertTrue(self.ucd.is_exemplar_wordbreak(u'\u05f3'))

    def test_wordbreak_midletter(self):
        self.assertFalse(self.ucd.is_exemplar_wordbreak(u'\u05f4'))

    def test_wordbreak_chinese(self):
        self.assertFalse(self.ucd.is_exemplar_wordbreak(u'\ua700'))

    def test_nfc(self):
        text = u'e\u0301'
        self.assertEqual(u'\u00e9', self.ucd.normalize('NFC', text))

    def test_nfd(self):
        text = u'\u00e9'
        self.assertEqual(u'e\u0301', self.ucd.normalize('NFD', text))

    def test_nfc_tus10(self):
        text = u'\u0061\u035C\u0315\u0300\u1DF6\u0062'
        self.assertEqual(u'\u00E0\u0315\u1DF6\u035C\u0062',
                         self.ucd.normalize('NFC', text))

    def test_nfd_tus10(self):
        text = u'\u0061\u035C\u0315\u0300\u1DF6\u0062'
        self.assertEqual(u'\u0061\u0300\u0315\u1DF6\u035C\u0062',
                         self.ucd.normalize('NFD', text))

    def ignore_nfc_tus11(self):
        text = u'\u0061\u0315\u0300\u05AE\u09FE\u0062'
        self.assertEqual(u'\u00E0\u05AE\u09FE\u0315\u0062',
                         self.ucd.normalize('NFC', text))

    def ignore_nfd_tus11(self):
        text = u'\u0061\u0315\u0300\u05AE\u09FE\u0062'
        self.assertEqual(u'\u0061\u05AE\u0300\u09FE\u0315\u0062',
                         self.ucd.normalize('NFD', text))

    def ignore_nfc_tus12(self):
        text = u'\u0061\u0315\u0300\u05AE\U0001E136\u0062'
        self.assertEqual(u'\u00E0\u05AE\U0001E136\u0315\u0062',
                         self.ucd.normalize('NFC', text))

    def ignore_nfd_tus12(self):
        text = u'\u0061\u0315\u0300\u05AE\U0001E136\u0062'
        self.assertEqual(u'\u0061\u05AE\u0300\U0001E136\u0315\u0062',
                         self.ucd.normalize('NFD', text))

    def ignore_nfc_tus13(self):
        text = u'\u0061\u3099\u093C\U00016FF0\u09BC\u0062'
        self.assertEqual(u'\u0061\U00016FF0\u093C\u09BC\u3099\u0062',
                         tf.normalize('NFC', text))

    def ignore_nfd_tus13(self):
        text = u'\u0061\u3099\u093C\U00016FF0\u09BC\u0062'
        self.assertEqual(u'\u0061\U00016FF0\u093C\u09BC\u3099\u0062',
                         tf.normalize('NFD', text))