コード例 #1
0
    def testNormalize(self):

        try:
            from icu import Normalizer2
        except ImportError:
            return

        self.assertNorm(
            Normalizer2.getInstance(None, "nfkc_cf",
                                    UNormalizationMode2.COMPOSE), u'hi there',
            "Hi There")
        self.assertNorm(Normalizer2.getNFCInstance(), u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFDInstance(), u"a\u0308ßa\u0301W",
                        u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCInstance(), u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKDInstance(), u"a\u0308ßa\u0301W",
                        u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCCasefoldInstance(), u"ässáw",
                        u"äßa\u0301W")
コード例 #2
0
ファイル: test_Normalizer.py プロジェクト: sciyoshi/pyicu
    def testNormalize(self):

        try:
            from icu import Normalizer2
        except ImportError:
            return

        self.assertNorm(Normalizer2.getInstance(None, "nfkc_cf",
                                                UNormalizationMode2.COMPOSE),
                        u'hi there', "Hi There")
        self.assertNorm(Normalizer2.getNFCInstance(),
                        u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFDInstance(),
                        u"a\u0308ßa\u0301W", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCInstance(),
                        u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKDInstance(),
                        u"a\u0308ßa\u0301W", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCCasefoldInstance(),
                        u"ässáw", u"äßa\u0301W")
コード例 #3
0
ファイル: sample.py プロジェクト: iharh/prj
from icu import Normalizer2

composer = Normalizer2.getNFCInstance()
decomposer = Normalizer2.getNFDInstance()

def compDecomp(orig):
    composed = composer.normalize(orig)
    decomposed = decomposer.normalize(orig)
    print(f"{orig} {composed} {decomposed}")

compDecomp('lội')
コード例 #4
0
ファイル: translit.py プロジェクト: ayum/ayum.translit
def to_latin(string, locale=locale):
    ustring = UnicodeString(string)
    nfc = Normalizer2.getNFCInstance()
    ustring = nfc.normalize(ustring)

    trans = Transliterator.createFromRules(
        "",
        "$wb = [^[:Letter:]] ;"
        # е
        "$wb { е > ye ;"
        "[ыq] { е } $wb > e ;"
        "[уеёыаоэяиюьъiuoeaq] { е > ye ;"
        "е > e ;"
        # э
        "$wb { э > e ;"
        "[жшцйjwcy] { э > е ;"
        "э > qe ;"
        # ы
        "[жшцйjwcy] { ы > i ;"
        "ы > q ;"
        # ё
        "$wb { ё > yo ;"
        "[жшцйjwcy] { ё > o ;"
        "[уеёыаоэяиюьъiuoeaq] { ё > yo ;"
        "ё > ho ;"
        # ю
        "$wb { ю > yu ;"
        "[жшцйjwcy] { ю > u ;"
        "[уеёыаоэяиюьъiuoeaq] { ю > yu ;"
        "ю > hu ;"
        # я
        "$wb { я > ya ;"
        "[жшцйjwcy] { я > a ;"
        "[уеёыаоэяиюьъiuoeaq] { я > ya ;"
        "я > ha ;"
        # Буквосочетание ьо,  только в заимствованных
        "ньо > nyo ;"
        "льо > lyo ;"
        "мьо > myo ;"
        "рьо > ryo ;"
        # Остальные буквы
        "а > a ;"
        "б > b ;"
        "в > v ;"
        "г > g ;"
        "д > d ;"
        "ж > j ;"
        "з > z ;"
        "и > i ;"
        "й > y ;"
        "к > k ;"
        "л > l ;"
        "м > m ;"
        "н > n ;"
        "о > o ;"
        "п > p ;"
        "р > r ;"
        "с > s ;"
        "т > t ;"
        "у > u ;"
        "ф > f ;"
        "х > x ;"
        "ц > c ;"
        "ч > ch ;"
        "ш > w ;"
        "щ > wh ;"
        # Проход с начала
        ":: Any-Null ;"
        "[nlmr] { ь } y[aueioq] > ;"
        "ь > h ;"
        "[nlmr] { ъ } y[aueioq] > y;"
        "ъ > ;"
        # Проход с начала
        ":: Any-Null ;"
        "h+ > h ;")
    ustring = trans.transliterate(ustring)
    return ustring