def __init__(self):
        # Maybe in new versions of PyICU the following
        # (now commented out) shorthand function is defined.
        # self.normalizer_nfc = Normalizer2.getNFCInstance()

        # Since it is not, use the non-shorthand function with the needed parameters
        self.normalizer_nfc = Normalizer2.getInstance(None, 'nfc', UNormalizationMode2.COMPOSE)
        self.normalizer_nfd = Normalizer2.getInstance(None, 'nfc', UNormalizationMode2.DECOMPOSE)
        self.normalizer_nfkc = Normalizer2.getInstance(None, 'nfkc', UNormalizationMode2.COMPOSE)
        self.normalizer_nfkd = Normalizer2.getInstance(None, 'nfkc', UNormalizationMode2.DECOMPOSE)
    def __init__(self):
        # Maybe in new versions of PyICU the following
        # (now commented out) shorthand function is defined.
        # self.normalizer_nfc = Normalizer2.getNFCInstance()

        # Since it is not, use the non-shorthand function with the needed parameters
        self.normalizer_nfc = Normalizer2.getInstance(None, 'nfc', UNormalizationMode2.COMPOSE)
        self.normalizer_nfd = Normalizer2.getInstance(None, 'nfc', UNormalizationMode2.DECOMPOSE)
        self.normalizer_nfkc = Normalizer2.getInstance(None, 'nfkc', UNormalizationMode2.COMPOSE)
        self.normalizer_nfkd = Normalizer2.getInstance(None, 'nfkc', UNormalizationMode2.DECOMPOSE)
Beispiel #3
0
 def createComponents(_self, fieldName):
     source = WhitespaceTokenizer()
     return Analyzer.TokenStreamComponents(
         source,
         ICUNormalizer2Filter(
             source,
             Normalizer2.getInstance(
                 None, "nfc", UNormalizationMode2.DECOMPOSE)))
    def __init__(self, input, normalizer=None):
        super(ICUNormalizer2Filter, self).__init__(input)

        self.input = input
        self.termAtt = self.addAttribute(CharTermAttribute.class_);

        if normalizer is None:
            normalizer = Normalizer2.getInstance(None, "nfkc_cf", UNormalizationMode2.COMPOSE)
        self.normalizer = normalizer
Beispiel #5
0
    def __init__(self, input, normalizer=None):
        super(ICUNormalizer2Filter, self).__init__(input)

        self.input = input
        self.termAtt = self.addAttribute(CharTermAttribute.class_)

        if normalizer is None:
            normalizer = Normalizer2.getInstance(None, "nfkc_cf",
                                                 UNormalizationMode2.COMPOSE)
        self.normalizer = normalizer
    def testNormalize(self):

        try:
            from icu import Normalizer2
        except ImportError:
            return

        normalizer = Normalizer2.getInstance(None, "nfkc_cf",
                                             UNormalizationMode2.COMPOSE)

        self.assertTrue(normalizer.normalize("Hi There") == u'hi there')

        a = UnicodeString()
        normalizer.normalize("Hi There", a)
        self.assertTrue(a == UnicodeString(u'hi there'))
Beispiel #7
0
    def testNormalize(self):

        try:
            from icu import Normalizer2
        except ImportError:
            return

        normalizer = Normalizer2.getInstance(None, "nfkc_cf",
                                             UNormalizationMode2.COMPOSE)

        self.assertTrue(normalizer.normalize("Hi There") == u'hi there')

        a = UnicodeString()
        normalizer.normalize("Hi There", a)
        self.assertTrue(a == UnicodeString(u'hi there'))
    def testNormalize(self):

        try:
            from icu import Normalizer2
        except ImportError:
            return

        self.assertNorm(
            Normalizer2.getInstance(None, "nfkc_cf",
                                    UNormalizationMode2.COMPOSE), u'hi there',
            "Hi There")
        self.assertNorm(Normalizer2.getNFCInstance(), u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFDInstance(), u"a\u0308ßa\u0301W",
                        u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCInstance(), u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKDInstance(), u"a\u0308ßa\u0301W",
                        u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCCasefoldInstance(), u"ässáw",
                        u"äßa\u0301W")
Beispiel #9
0
    def testNormalize(self):

        try:
            from icu import Normalizer2
        except ImportError:
            return

        self.assertNorm(Normalizer2.getInstance(None, "nfkc_cf",
                                                UNormalizationMode2.COMPOSE),
                        u'hi there', "Hi There")
        self.assertNorm(Normalizer2.getNFCInstance(),
                        u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFDInstance(),
                        u"a\u0308ßa\u0301W", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCInstance(),
                        u"äßáW", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKDInstance(),
                        u"a\u0308ßa\u0301W", u"äßa\u0301W")
        self.assertNorm(Normalizer2.getNFKCCasefoldInstance(),
                        u"ässáw", u"äßa\u0301W")
Beispiel #10
0
 def tokenStream(_self, fieldName, reader):
     return ICUNormalizer2Filter(
         WhitespaceTokenizer(Version.LUCENE_CURRENT, reader),
         Normalizer2.getInstance(None, "nfc",
                                 UNormalizationMode2.DECOMPOSE))
Beispiel #11
0
    def __init__(self, input):

        normalizer = Normalizer2.getInstance("utr30", "utr30",
                                             UNormalizationMode2.COMPOSE)
        super(ICUFoldingFilter, self).__init__(input, normalizer)
 def tokenStream(_self, fieldName, reader):
     return ICUNormalizer2Filter(WhitespaceTokenizer(Version.LUCENE_CURRENT, reader),
                                 Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE))
 def createComponents(_self, fieldName):
     source = WhitespaceTokenizer()
     return Analyzer.TokenStreamComponents(
         source, ICUNormalizer2Filter(
             source,
             Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE)))