def transitionPossibleForWord(self, root: TxtWord,
                               fromState: State) -> bool:
     if root.isAdjective() and ((root.isNominal() and not root.isExceptional()) or root.isPronoun()) \
             and self.__toState.getName() == "NominalRoot(ADJ)" and self.__with == "0":
         return False
     if root.isAdjective() and root.isNominal() and self.__with == "^DB+VERB+ZERO+PRES+A3PL" \
             and fromState.getName() == "AdjectiveRoot":
         return False
     if root.isAdjective() and root.isNominal(
     ) and self.__with == "SH" and fromState.getName() == "AdjectiveRoot":
         return False
     if self.__with == "ki":
         return root.takesRelativeSuffixKi()
     if self.__with == "kü":
         return root.takesRelativeSuffixKu()
     if self.__with == "dHr":
         if self.__toState.getName() == "Adverb":
             return True
         else:
             return root.takesSuffixDIRAsFactitive()
     if self.__with == "Hr" and (
             self.__toState.getName() == "AdjectiveRoot(VERB)"
             or self.__toState.getName() == "OtherTense"
             or self.__toState.getName() == "OtherTense2"):
         return root.takesSuffixIRAsAorist()
     return True
    def addSuffix(self, suffix: State, form: str, transition: str,
                  withName: str, toPos: str):
        """
        The addSuffix method takes 5 different inputs; State suffix, str form, transition, with and toPos.
        If the pos of given input suffix is not None, it then assigns it to the pos variable. If the pos of the given
        suffix is None but given toPos is not None than it assigns toPos to pos variable. At the end, it adds suffix to
        the suffixList, form to the formList, transition to the transitionList and if given with is not 0, it is also
        added to withList.

        PARAMETERS
        ----------
        suffix : State
            State input.
        form : str
            String input.
        transition : str
            String input.
        withName : str
            String input.
        toPos : str
            String input.
        """
        if suffix.getPos() is not None:
            self.__pos = suffix.getPos()
        else:
            if toPos is not None:
                self.__pos = toPos
        self.__suffixList.append(suffix)
        self.__formList.append(form)
        self.__transitionList.append(transition)
        if withName != "0":
            self.__withList.append(withName)
        self.__form = form
 def makeTransitionNoStartState(self, root: TxtWord, stem: str) -> str:
     if root.isVerb():
         return self.makeTransition(root, stem,
                                    State("VerbalRoot", True, False))
     else:
         return self.makeTransition(root, stem,
                                    State("NominalRoot", True, False))
 def test_morphologicalAnalysisIsPortmanteau(self):
     dictionary = self.fsm.getDictionary()
     for i in range(dictionary.size()):
         word = dictionary.getWordWithIndex(i)
         if isinstance(word, TxtWord):
             if word.isNominal() and word.isPortmanteau() and not word.isPlural() and \
                     not word.isPortmanteauFacedVowelEllipsis():
                 transitionState = State("CompoundNounRoot", True, False)
                 startState = State("CompoundNounRoot", True, False)
                 transition = Transition("lArH", transitionState,
                                         "A3PL+P3PL")
                 exceptLast2 = word.getName()[:len(word.getName()) - 2]
                 exceptLast = word.getName()[:len(word.getName()) - 1]
                 if word.isPortmanteauFacedSoftening():
                     if word.getName()[len(word.getName()) - 2] == "b":
                         rootForm = exceptLast2 + 'p'
                     elif word.getName()[len(word.getName()) - 2] == "c":
                         rootForm = exceptLast2 + 'ç'
                     elif word.getName()[len(word.getName()) - 2] == "d":
                         rootForm = exceptLast2 + 't'
                     elif word.getName()[len(word.getName()) - 2] == "ğ":
                         rootForm = exceptLast2 + 'k'
                     else:
                         rootForm = exceptLast
                 else:
                     if word.isPortmanteauEndingWithSI():
                         rootForm = exceptLast2
                     else:
                         rootForm = exceptLast
                 surfaceForm = transition.makeTransition(
                     word, rootForm, startState)
                 self.assertTrue(
                     self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                 )
Exemple #5
0
 def addSuffix(self, suffix: State, form: str, transition: str,
               withName: str, toPos: str):
     if suffix.getPos() is not None:
         self.__pos = suffix.getPos()
     else:
         if toPos is not None:
             self.__pos = toPos
     self.__suffixList.append(suffix)
     self.__formList.append(form)
     self.__transitionList.append(transition)
     if withName != "0":
         self.__withList.append(withName)
     self.__form = form
Exemple #6
0
 def __init__(self, fileName: str):
     self.__transitions = {}
     self.__states = []
     root = xml.etree.ElementTree.parse(fileName).getroot()
     for stateNode in root:
         stateName = stateNode.attrib["name"]
         startState = stateNode.attrib["start"] == "yes"
         endState = stateNode.attrib["end"] == "yes"
         if startState:
             originalPos = stateNode.attrib["originalpos"]
             self.__states.append(
                 State(stateName, True, endState, originalPos))
         else:
             self.__states.append(State(stateName, False, endState))
     for stateNode in root:
         if "name" in stateNode.attrib:
             stateName = stateNode.attrib["name"]
             state = self.getState(stateName)
             for transitionNode in stateNode:
                 stateName = transitionNode.attrib["name"]
                 if "transitionname" in transitionNode.attrib:
                     withName = transitionNode.attrib["transitionname"]
                 else:
                     withName = None
                 if "topos" in transitionNode.attrib:
                     rootToPos = transitionNode.attrib["topos"]
                 else:
                     rootToPos = None
                 toState = self.getState(stateName)
                 if toState is not None:
                     for withNode in transitionNode:
                         if "name" in withNode.attrib:
                             withName = withNode.attrib["name"]
                             if "topos" in withNode.attrib:
                                 toPos = withNode.attrib["topos"]
                             else:
                                 toPos = None
                         else:
                             toPos = None
                         if toPos is None:
                             if rootToPos is None:
                                 self.addTransition(state, toState,
                                                    withNode.text, withName)
                             else:
                                 self.addTransition(state, toState,
                                                    withNode.text, withName,
                                                    rootToPos)
                         else:
                             self.addTransition(state, toState,
                                                withNode.text, withName,
                                                toPos)
 def test_morphologicalAnalysisVowelAChangesToIDuringYSuffixation(self):
     dictionary = self.fsm.getDictionary()
     for i in range(dictionary.size()):
         word = dictionary.getWordWithIndex(i)
         if isinstance(word, TxtWord):
             if word.isVerb() and word.vowelAChangesToIDuringYSuffixation():
                 transitionState = State("VerbalStem", False, False)
                 startState = State("VerbalRoot", True, False)
                 transition = Transition("Hyor", transitionState, "PROG1")
                 surfaceForm = transition.makeTransition(
                     word, word.getName(), startState)
                 self.assertTrue(
                     self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                 )
 def test_morphologicalAnalysisNounSoftenDuringSuffixation(self):
     dictionary = self.fsm.getDictionary()
     for i in range(dictionary.size()):
         word = dictionary.getWordWithIndex(i)
         if isinstance(word, TxtWord):
             if word.isNominal() and word.nounSoftenDuringSuffixation():
                 transitionState = State("Possessive", False, False)
                 startState = State("NominalRoot", True, False)
                 transition = Transition("yH", transitionState, "ACC")
                 surfaceForm = transition.makeTransition(
                     word, word.getName(), startState)
                 self.assertTrue(
                     self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                 )
 def test_morphologicalAnalysisLastIdropsDuringPassiveSuffixation(self):
     dictionary = self.fsm.getDictionary()
     for i in range(dictionary.size()):
         word = dictionary.getWordWithIndex(i)
         if isinstance(word, TxtWord):
             if word.isVerb() and word.lastIdropsDuringPassiveSuffixation():
                 transitionState = State("VerbalStem", False, False)
                 startState = State("VerbalRoot", True, False)
                 transition = Transition("Hl", transitionState,
                                         "^DB+VERB+PASS")
                 surfaceForm = transition.makeTransition(
                     word, word.getName(), startState)
                 self.assertTrue(
                     self.fsm.morphologicalAnalysis(surfaceForm).size() != 0
                 )
    def makeTransitionNoStartState(self, root: TxtWord, stem: str) -> str:
        """
        The makeTransition method takes a TxtWord root and s str stem as inputs. If given root is a verb,
        it makes transition with given root and stem with the verbal root state. If given root is not verb, it makes
        transition with given root and stem and the nominal root state.

        PARAMETERS
        ----------
        root : TxtWord
            TxtWord input.
        stem : str
            String input.

        RETURNS
        -------
        str
            String type output that has the transition.
        """
        if root.isVerb():
            return self.makeTransition(root, stem, State("VerbalRoot", True, False))
        else:
            return self.makeTransition(root, stem, State("NominalRoot", True, False))
 def makeTransition(self, root: TxtWord, stem: str, startState: State) -> str:
     rootWord = root.getName() == stem or (root.getName() + "'") == stem
     formation = stem
     i = 0
     if self.__with == "0":
         return stem
     if (stem == "bu" or stem == "şu" or stem == "o") and rootWord and self.__with == "ylA":
         return stem + "nunla"
     if self.__with == "yA":
         if stem == "ben":
             return "bana"
     self.__formationToCheck = stem
     if rootWord and self.__withFirstChar() == "y" and root.vowelEChangesToIDuringYSuffixation() \
             and self.__with[1] != "H":
         formation = stem[:len(stem) - 1] + "i"
         self.__formationToCheck = formation
     else:
         if rootWord and (self.__with == "Hl" or self.__with == "Hn") and root.lastIdropsDuringPassiveSuffixation():
             formation = stem[:len(stem) - 2] + stem[len(stem) - 1]
             self.__formationToCheck = stem
         else:
             if rootWord and root.showsSuRegularities() and self.__startWithVowelorConsonantDrops() and \
                     not self.__with.startswith("y"):
                 formation = stem + 'y'
                 self.__formationToCheck = formation
             else:
                 if rootWord and root.duplicatesDuringSuffixation() and TurkishLanguage.isConsonantDrop(
                         self.__with[0]):
                     if self.softenDuringSuffixation(root):
                         if self.__lastPhoneme(stem) == "p":
                             formation = stem[:len(stem) - 1] + "bb"
                         elif self.__lastPhoneme(stem) == "t":
                             formation = stem[:len(stem) - 1] + "dd"
                     else:
                         formation = stem + stem[len(stem) - 1]
                     self.__formationToCheck = formation
                 else:
                     if rootWord and root.lastIdropsDuringSuffixation() and \
                             not startState.getName().startswith(
                                 "VerbalRoot") and not startState.getName().startswith("ProperRoot") \
                             and self.__startWithVowelorConsonantDrops():
                         if self.softenDuringSuffixation(root):
                             if self.__lastPhoneme(stem) == "p":
                                 formation = stem[:len(stem) - 2] + 'b'
                             elif self.__lastPhoneme(stem) == "t":
                                 formation = stem[:len(stem) - 2] + 'd'
                             elif self.__lastPhoneme(stem) == "ç":
                                 formation = stem[:len(stem) - 2] + 'c'
                         else:
                             formation = stem[:len(stem) - 2] + stem[len(stem) - 1]
                         self.__formationToCheck = stem
                     else:
                         if self.__lastPhoneme(stem) == "p":
                             if self.__startWithVowelorConsonantDrops() and rootWord and \
                                     self.softenDuringSuffixation(root):
                                 formation = stem[:len(stem) - 1] + 'b'
                         elif self.__lastPhoneme(stem) == "t":
                             if self.__startWithVowelorConsonantDrops() and rootWord and \
                                     self.softenDuringSuffixation(root):
                                 formation = stem[:len(stem) - 1] + 'd'
                         elif self.__lastPhoneme(stem) == "ç":
                             if self.__startWithVowelorConsonantDrops() and rootWord and \
                                     self.softenDuringSuffixation(root):
                                 formation = stem[:len(stem) - 1] + 'c'
                         elif self.__lastPhoneme(stem) == "g":
                             if self.__startWithVowelorConsonantDrops() and rootWord and \
                                     self.softenDuringSuffixation(root):
                                 formation = stem[:len(stem) - 1] + 'ğ'
                         elif self.__lastPhoneme(stem) == "k":
                             if self.__startWithVowelorConsonantDrops() and rootWord and root.endingKChangesIntoG() \
                                     and not root.isProperNoun():
                                 formation = stem[:len(stem) - 1] + 'g'
                             else:
                                 if self.__startWithVowelorConsonantDrops() and (not rootWord or (
                                         self.softenDuringSuffixation(root) and (
                                         not root.isProperNoun() or startState.__str__() != "ProperRoot"))):
                                     formation = stem[:len(stem) - 1] + 'ğ'
                         self.__formationToCheck = formation
     if TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and not TurkishLanguage.isVowel(stem[len(stem) - 1])\
             and (root.isNumeral() or root.isReal() or root.isFraction() or root.isTime() or root.isDate()
                  or root.isPercent() or root.isRange()) \
             and (root.getName().endswith("1") or root.getName().endswith("3") or root.getName().endswith("4")
                  or root.getName().endswith("5") or root.getName().endswith("8") or root.getName().endswith("9")
                  or root.getName().endswith("10") or root.getName().endswith("30") or root.getName().endswith("40")
                  or root.getName().endswith("60") or root.getName().endswith("70") or root.getName().endswith("80")
                  or root.getName().endswith("90") or root.getName().endswith("00")):
         if self.__with[0] == "'":
             formation = formation + "'"
             i = 2
         else:
             i = 1
     else:
         if (TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and TurkishLanguage.isConsonant(
                 self.__lastPhoneme(stem))) or (rootWord and root.consonantSMayInsertedDuringPossesiveSuffixation()):
             if self.__with[0] == "'":
                 formation = formation + "'"
                 if root.isAbbreviation():
                     i = 1
                 else:
                     i = 2
             else:
                 i = 1
     while i < len(self.__with):
         if self.__with[i] == "D":
             formation = self.__resolveD(root, formation)
         elif self.__with[i] == "A":
             formation = self.__resolveA(root, formation, rootWord)
         elif self.__with[i] == "H":
             if self.__with[0] != "'":
                 formation = self.__resolveH(root, formation, i == 0, self.__with.startswith("Hyor"), rootWord)
             else:
                 formation = self.__resolveH(root, formation, i == 1, False, rootWord)
         elif self.__with[i] == "C":
             formation = self.__resolveC(formation)
         elif self.__with[i] == "S":
             formation = self.__resolveS(formation)
         elif self.__with[i] == "Ş":
             formation = self.__resolveSh(formation)
         else:
             if i == len(self.__with) - 1 and self.__with[i] == "s":
                 formation += "ş"
             else:
                 formation += self.__with[i]
         self.__formationToCheck = formation
         i = i + 1
     return formation
    def __init__(self, fileName: str):
        """
        Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states
        of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode.
        Also there are two states; state that a node currently in and state that a node will be in.

        XMLParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the
        tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called
        stateList.
        Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start,
        and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'.
        Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its
        original part of speech.

        At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called
        toState then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the
        causative states.

        PARAMETERS
        ----------
        fileName : str
            the resource file to read the finite state machine. Only files in resources folder are supported.
        """
        self.__transitions = {}
        self.__states = []
        root = xml.etree.ElementTree.parse(fileName).getroot()
        for stateNode in root:
            stateName = stateNode.attrib["name"]
            startState = stateNode.attrib["start"] == "yes"
            endState = stateNode.attrib["end"] == "yes"
            if startState:
                originalPos = stateNode.attrib["originalpos"]
                self.__states.append(State(stateName, True, endState, originalPos))
            else:
                self.__states.append(State(stateName, False, endState))
        for stateNode in root:
            if "name" in stateNode.attrib:
                stateName = stateNode.attrib["name"]
                state = self.getState(stateName)
                for transitionNode in stateNode:
                    stateName = transitionNode.attrib["name"]
                    if "transitionname" in transitionNode.attrib:
                        withName = transitionNode.attrib["transitionname"]
                    else:
                        withName = None
                    if "topos" in transitionNode.attrib:
                        rootToPos = transitionNode.attrib["topos"]
                    else:
                        rootToPos = None
                    toState = self.getState(stateName)
                    if toState is not None:
                        for withNode in transitionNode:
                            if "name" in withNode.attrib:
                                withName = withNode.attrib["name"]
                                if "topos" in withNode.attrib:
                                    toPos = withNode.attrib["topos"]
                                else:
                                    toPos = None
                            else:
                                toPos = None
                            if toPos is None:
                                if rootToPos is None:
                                    self.addTransition(state, toState, withNode.text, withName)
                                else:
                                    self.addTransition(state, toState, withNode.text, withName, rootToPos)
                            else:
                                self.addTransition(state, toState, withNode.text, withName, toPos)