Esempio n. 1
0
 def test_Deasciify(self):
     fsm = FsmMorphologicalAnalyzer("../turkish_dictionary.txt",
                                    "../turkish_misspellings.txt",
                                    "../turkish_finite_state_machine.xml")
     simpleDeasciifier = SimpleDeasciifier(fsm)
     simpleAsciifier = SimpleAsciifier()
     for i in range(fsm.getDictionary().size()):
         word = fsm.getDictionary().getWordWithIndex(i)
         count = 0
         for j in range(len(word.getName())):
             if word.getName()[j] == 'ç' or word.getName()[j] == 'ö' or word.getName()[j] == 'ğ' or \
                     word.getName()[j] == 'ü' or word.getName()[j] == 'ş' or word.getName()[j] == 'ı':
                 count = count + 1
         if (count > 0 and not word.getName().endswith("fulü")
                 and (word.isNominal() or word.isAdjective()
                      or word.isAdverb() or word.isVerb())):
             asciified = simpleAsciifier.asciifyWord(word)
             if len(simpleDeasciifier.candidateList(Word(asciified))) == 1:
                 deasciified = simpleDeasciifier.deasciify(
                     Sentence(asciified)).toString()
                 self.assertEqual(word.getName(), deasciified)
class FsmMorphologicalAnalyzerTest(unittest.TestCase):

    fsm: FsmMorphologicalAnalyzer

    def setUp(self) -> None:
        self.fsm = FsmMorphologicalAnalyzer(
            "../turkish_dictionary.txt", "../turkish_misspellings.txt",
            "../turkish_finite_state_machine.xml")

    def test_morphologicalAnalysisDataTimeNumber(self):
        self.assertTrue(self.fsm.morphologicalAnalysis("3/4").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("3\\/4").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("4/2/1973").size() != 0)
        self.assertTrue(
            self.fsm.morphologicalAnalysis("14/2/1993").size() != 0)
        self.assertTrue(
            self.fsm.morphologicalAnalysis("14/12/1933").size() != 0)
        self.assertTrue(
            self.fsm.morphologicalAnalysis("6/12/1903").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("%34.5").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("%3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("%56").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("2:3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("12:3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("4:23").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("11:56").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("1:2:3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("3:12:3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("5:4:23").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("7:11:56").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("12:2:3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("10:12:3").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("11:4:23").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("22:11:56").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("45").size() != 0)
        self.assertTrue(self.fsm.morphologicalAnalysis("34.23").size() != 0)

    def test_morphologicalAnalysisProperNoun(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isProperNoun():
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(word.getName().replace(
                            "i", "İ").upper()).size() != 0)

    def test_morphologicalAnalysisNounSoftenDuringSuffixation(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isNominal() and word.nounSoftenDuringSuffixation():
                    transitionState = State("Possessive", False, False)
                    startState = State("NominalRoot", True, False)
                    transition = Transition("yH", transitionState, "ACC")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisVowelAChangesToIDuringYSuffixation(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isVerb() and word.vowelAChangesToIDuringYSuffixation():
                    transitionState = State("VerbalStem", False, False)
                    startState = State("VerbalRoot", True, False)
                    transition = Transition("Hyor", transitionState, "PROG1")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisIsPortmanteau(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isNominal() and word.isPortmanteau() and not word.isPlural() and \
                        not word.isPortmanteauFacedVowelEllipsis():
                    transitionState = State("CompoundNounRoot", True, False)
                    startState = State("CompoundNounRoot", True, False)
                    transition = Transition("lArH", transitionState,
                                            "A3PL+P3PL")
                    exceptLast2 = word.getName()[:len(word.getName()) - 2]
                    exceptLast = word.getName()[:len(word.getName()) - 1]
                    if word.isPortmanteauFacedSoftening():
                        if word.getName()[len(word.getName()) - 2] == "b":
                            rootForm = exceptLast2 + 'p'
                        elif word.getName()[len(word.getName()) - 2] == "c":
                            rootForm = exceptLast2 + 'ç'
                        elif word.getName()[len(word.getName()) - 2] == "d":
                            rootForm = exceptLast2 + 't'
                        elif word.getName()[len(word.getName()) - 2] == "ğ":
                            rootForm = exceptLast2 + 'k'
                        else:
                            rootForm = exceptLast
                    else:
                        if word.isPortmanteauEndingWithSI():
                            rootForm = exceptLast2
                        else:
                            rootForm = exceptLast
                    surfaceForm = transition.makeTransition(
                        word, rootForm, startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisNotObeysVowelHarmonyDuringAgglutination(
            self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isNominal(
                ) and word.notObeysVowelHarmonyDuringAgglutination():
                    transitionState = State("Possessive", False, False)
                    startState = State("NominalRoot", True, False)
                    transition = Transition("yH", transitionState, "ACC")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisLastIdropsDuringSuffixation(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isNominal() and word.lastIdropsDuringSuffixation():
                    transitionState = State("Possessive", False, False)
                    startState = State("NominalRoot", True, False)
                    transition = Transition("yH", transitionState, "ACC")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisVerbSoftenDuringSuffixation(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isVerb() and word.verbSoftenDuringSuffixation():
                    transitionState = State("VerbalStem", False, False)
                    startState = State("VerbalRoot", True, False)
                    transition = Transition("Hyor", transitionState, "PROG1")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisDuplicatesDuringSuffixation(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isNominal() and word.duplicatesDuringSuffixation():
                    transitionState = State("Possessive", False, False)
                    startState = State("NominalRoot", True, False)
                    transition = Transition("yH", transitionState, "ACC")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisEndingKChangesIntoG(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isNominal() and word.endingKChangesIntoG():
                    transitionState = State("Possessive", False, False)
                    startState = State("NominalRoot", True, False)
                    transition = Transition("yH", transitionState, "ACC")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )

    def test_morphologicalAnalysisLastIdropsDuringPassiveSuffixation(self):
        dictionary = self.fsm.getDictionary()
        for i in range(dictionary.size()):
            word = dictionary.getWordWithIndex(i)
            if isinstance(word, TxtWord):
                if word.isVerb() and word.lastIdropsDuringPassiveSuffixation():
                    transitionState = State("VerbalStem", False, False)
                    startState = State("VerbalRoot", True, False)
                    transition = Transition("Hl", transitionState,
                                            "^DB+VERB+PASS")
                    surfaceForm = transition.makeTransition(
                        word, word.getName(), startState)
                    self.assertTrue(
                        self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                    )