def transitionPossibleForWord(self, root: TxtWord, fromState: State) -> bool: if root.isAdjective() and ((root.isNominal() and not root.isExceptional()) or root.isPronoun()) \ and self.__toState.getName() == "NominalRoot(ADJ)" and self.__with == "0": return False if root.isAdjective() and root.isNominal() and self.__with == "^DB+VERB+ZERO+PRES+A3PL" \ and fromState.getName() == "AdjectiveRoot": return False if root.isAdjective() and root.isNominal( ) and self.__with == "SH" and fromState.getName() == "AdjectiveRoot": return False if self.__with == "ki": return root.takesRelativeSuffixKi() if self.__with == "kü": return root.takesRelativeSuffixKu() if self.__with == "dHr": if self.__toState.getName() == "Adverb": return True else: return root.takesSuffixDIRAsFactitive() if self.__with == "Hr" and ( self.__toState.getName() == "AdjectiveRoot(VERB)" or self.__toState.getName() == "OtherTense" or self.__toState.getName() == "OtherTense2"): return root.takesSuffixIRAsAorist() return True
def addSuffix(self, suffix: State, form: str, transition: str, withName: str, toPos: str): """ The addSuffix method takes 5 different inputs; State suffix, str form, transition, with and toPos. If the pos of given input suffix is not None, it then assigns it to the pos variable. If the pos of the given suffix is None but given toPos is not None than it assigns toPos to pos variable. At the end, it adds suffix to the suffixList, form to the formList, transition to the transitionList and if given with is not 0, it is also added to withList. PARAMETERS ---------- suffix : State State input. form : str String input. transition : str String input. withName : str String input. toPos : str String input. """ if suffix.getPos() is not None: self.__pos = suffix.getPos() else: if toPos is not None: self.__pos = toPos self.__suffixList.append(suffix) self.__formList.append(form) self.__transitionList.append(transition) if withName != "0": self.__withList.append(withName) self.__form = form
def makeTransitionNoStartState(self, root: TxtWord, stem: str) -> str: if root.isVerb(): return self.makeTransition(root, stem, State("VerbalRoot", True, False)) else: return self.makeTransition(root, stem, State("NominalRoot", True, False))
def test_morphologicalAnalysisIsPortmanteau(self): dictionary = self.fsm.getDictionary() for i in range(dictionary.size()): word = dictionary.getWordWithIndex(i) if isinstance(word, TxtWord): if word.isNominal() and word.isPortmanteau() and not word.isPlural() and \ not word.isPortmanteauFacedVowelEllipsis(): transitionState = State("CompoundNounRoot", True, False) startState = State("CompoundNounRoot", True, False) transition = Transition("lArH", transitionState, "A3PL+P3PL") exceptLast2 = word.getName()[:len(word.getName()) - 2] exceptLast = word.getName()[:len(word.getName()) - 1] if word.isPortmanteauFacedSoftening(): if word.getName()[len(word.getName()) - 2] == "b": rootForm = exceptLast2 + 'p' elif word.getName()[len(word.getName()) - 2] == "c": rootForm = exceptLast2 + 'ç' elif word.getName()[len(word.getName()) - 2] == "d": rootForm = exceptLast2 + 't' elif word.getName()[len(word.getName()) - 2] == "ğ": rootForm = exceptLast2 + 'k' else: rootForm = exceptLast else: if word.isPortmanteauEndingWithSI(): rootForm = exceptLast2 else: rootForm = exceptLast surfaceForm = transition.makeTransition( word, rootForm, startState) self.assertTrue( self.fsm.morphologicalAnalysis(surfaceForm).size() != 0 )
def addSuffix(self, suffix: State, form: str, transition: str, withName: str, toPos: str): if suffix.getPos() is not None: self.__pos = suffix.getPos() else: if toPos is not None: self.__pos = toPos self.__suffixList.append(suffix) self.__formList.append(form) self.__transitionList.append(transition) if withName != "0": self.__withList.append(withName) self.__form = form
def __init__(self, fileName: str): self.__transitions = {} self.__states = [] root = xml.etree.ElementTree.parse(fileName).getroot() for stateNode in root: stateName = stateNode.attrib["name"] startState = stateNode.attrib["start"] == "yes" endState = stateNode.attrib["end"] == "yes" if startState: originalPos = stateNode.attrib["originalpos"] self.__states.append( State(stateName, True, endState, originalPos)) else: self.__states.append(State(stateName, False, endState)) for stateNode in root: if "name" in stateNode.attrib: stateName = stateNode.attrib["name"] state = self.getState(stateName) for transitionNode in stateNode: stateName = transitionNode.attrib["name"] if "transitionname" in transitionNode.attrib: withName = transitionNode.attrib["transitionname"] else: withName = None if "topos" in transitionNode.attrib: rootToPos = transitionNode.attrib["topos"] else: rootToPos = None toState = self.getState(stateName) if toState is not None: for withNode in transitionNode: if "name" in withNode.attrib: withName = withNode.attrib["name"] if "topos" in withNode.attrib: toPos = withNode.attrib["topos"] else: toPos = None else: toPos = None if toPos is None: if rootToPos is None: self.addTransition(state, toState, withNode.text, withName) else: self.addTransition(state, toState, withNode.text, withName, rootToPos) else: self.addTransition(state, toState, withNode.text, withName, toPos)
def test_morphologicalAnalysisVowelAChangesToIDuringYSuffixation(self): dictionary = self.fsm.getDictionary() for i in range(dictionary.size()): word = dictionary.getWordWithIndex(i) if isinstance(word, TxtWord): if word.isVerb() and word.vowelAChangesToIDuringYSuffixation(): transitionState = State("VerbalStem", False, False) startState = State("VerbalRoot", True, False) transition = Transition("Hyor", transitionState, "PROG1") surfaceForm = transition.makeTransition( word, word.getName(), startState) self.assertTrue( self.fsm.morphologicalAnalysis(surfaceForm).size() != 0 )
def test_morphologicalAnalysisNounSoftenDuringSuffixation(self): dictionary = self.fsm.getDictionary() for i in range(dictionary.size()): word = dictionary.getWordWithIndex(i) if isinstance(word, TxtWord): if word.isNominal() and word.nounSoftenDuringSuffixation(): transitionState = State("Possessive", False, False) startState = State("NominalRoot", True, False) transition = Transition("yH", transitionState, "ACC") surfaceForm = transition.makeTransition( word, word.getName(), startState) self.assertTrue( self.fsm.morphologicalAnalysis(surfaceForm).size() != 0 )
def test_morphologicalAnalysisLastIdropsDuringPassiveSuffixation(self): dictionary = self.fsm.getDictionary() for i in range(dictionary.size()): word = dictionary.getWordWithIndex(i) if isinstance(word, TxtWord): if word.isVerb() and word.lastIdropsDuringPassiveSuffixation(): transitionState = State("VerbalStem", False, False) startState = State("VerbalRoot", True, False) transition = Transition("Hl", transitionState, "^DB+VERB+PASS") surfaceForm = transition.makeTransition( word, word.getName(), startState) self.assertTrue( self.fsm.morphologicalAnalysis(surfaceForm).size() != 0 )
def makeTransitionNoStartState(self, root: TxtWord, stem: str) -> str: """ The makeTransition method takes a TxtWord root and s str stem as inputs. If given root is a verb, it makes transition with given root and stem with the verbal root state. If given root is not verb, it makes transition with given root and stem and the nominal root state. PARAMETERS ---------- root : TxtWord TxtWord input. stem : str String input. RETURNS ------- str String type output that has the transition. """ if root.isVerb(): return self.makeTransition(root, stem, State("VerbalRoot", True, False)) else: return self.makeTransition(root, stem, State("NominalRoot", True, False))
def makeTransition(self, root: TxtWord, stem: str, startState: State) -> str: rootWord = root.getName() == stem or (root.getName() + "'") == stem formation = stem i = 0 if self.__with == "0": return stem if (stem == "bu" or stem == "şu" or stem == "o") and rootWord and self.__with == "ylA": return stem + "nunla" if self.__with == "yA": if stem == "ben": return "bana" self.__formationToCheck = stem if rootWord and self.__withFirstChar() == "y" and root.vowelEChangesToIDuringYSuffixation() \ and self.__with[1] != "H": formation = stem[:len(stem) - 1] + "i" self.__formationToCheck = formation else: if rootWord and (self.__with == "Hl" or self.__with == "Hn") and root.lastIdropsDuringPassiveSuffixation(): formation = stem[:len(stem) - 2] + stem[len(stem) - 1] self.__formationToCheck = stem else: if rootWord and root.showsSuRegularities() and self.__startWithVowelorConsonantDrops() and \ not self.__with.startswith("y"): formation = stem + 'y' self.__formationToCheck = formation else: if rootWord and root.duplicatesDuringSuffixation() and TurkishLanguage.isConsonantDrop( self.__with[0]): if self.softenDuringSuffixation(root): if self.__lastPhoneme(stem) == "p": formation = stem[:len(stem) - 1] + "bb" elif self.__lastPhoneme(stem) == "t": formation = stem[:len(stem) - 1] + "dd" else: formation = stem + stem[len(stem) - 1] self.__formationToCheck = formation else: if rootWord and root.lastIdropsDuringSuffixation() and \ not startState.getName().startswith( "VerbalRoot") and not startState.getName().startswith("ProperRoot") \ and self.__startWithVowelorConsonantDrops(): if self.softenDuringSuffixation(root): if self.__lastPhoneme(stem) == "p": formation = stem[:len(stem) - 2] + 'b' elif self.__lastPhoneme(stem) == "t": formation = stem[:len(stem) - 2] + 'd' elif self.__lastPhoneme(stem) == "ç": formation = stem[:len(stem) - 2] + 'c' else: formation = stem[:len(stem) - 2] + stem[len(stem) - 1] self.__formationToCheck = stem else: if self.__lastPhoneme(stem) == "p": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'b' elif self.__lastPhoneme(stem) == "t": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'd' elif self.__lastPhoneme(stem) == "ç": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'c' elif self.__lastPhoneme(stem) == "g": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'ğ' elif self.__lastPhoneme(stem) == "k": if self.__startWithVowelorConsonantDrops() and rootWord and root.endingKChangesIntoG() \ and not root.isProperNoun(): formation = stem[:len(stem) - 1] + 'g' else: if self.__startWithVowelorConsonantDrops() and (not rootWord or ( self.softenDuringSuffixation(root) and ( not root.isProperNoun() or startState.__str__() != "ProperRoot"))): formation = stem[:len(stem) - 1] + 'ğ' self.__formationToCheck = formation if TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and not TurkishLanguage.isVowel(stem[len(stem) - 1])\ and (root.isNumeral() or root.isReal() or root.isFraction() or root.isTime() or root.isDate() or root.isPercent() or root.isRange()) \ and (root.getName().endswith("1") or root.getName().endswith("3") or root.getName().endswith("4") or root.getName().endswith("5") or root.getName().endswith("8") or root.getName().endswith("9") or root.getName().endswith("10") or root.getName().endswith("30") or root.getName().endswith("40") or root.getName().endswith("60") or root.getName().endswith("70") or root.getName().endswith("80") or root.getName().endswith("90") or root.getName().endswith("00")): if self.__with[0] == "'": formation = formation + "'" i = 2 else: i = 1 else: if (TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and TurkishLanguage.isConsonant( self.__lastPhoneme(stem))) or (rootWord and root.consonantSMayInsertedDuringPossesiveSuffixation()): if self.__with[0] == "'": formation = formation + "'" if root.isAbbreviation(): i = 1 else: i = 2 else: i = 1 while i < len(self.__with): if self.__with[i] == "D": formation = self.__resolveD(root, formation) elif self.__with[i] == "A": formation = self.__resolveA(root, formation, rootWord) elif self.__with[i] == "H": if self.__with[0] != "'": formation = self.__resolveH(root, formation, i == 0, self.__with.startswith("Hyor"), rootWord) else: formation = self.__resolveH(root, formation, i == 1, False, rootWord) elif self.__with[i] == "C": formation = self.__resolveC(formation) elif self.__with[i] == "S": formation = self.__resolveS(formation) elif self.__with[i] == "Ş": formation = self.__resolveSh(formation) else: if i == len(self.__with) - 1 and self.__with[i] == "s": formation += "ş" else: formation += self.__with[i] self.__formationToCheck = formation i = i + 1 return formation
def __init__(self, fileName: str): """ Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode. Also there are two states; state that a node currently in and state that a node will be in. XMLParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called stateList. Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start, and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'. Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its original part of speech. At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called toState then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the causative states. PARAMETERS ---------- fileName : str the resource file to read the finite state machine. Only files in resources folder are supported. """ self.__transitions = {} self.__states = [] root = xml.etree.ElementTree.parse(fileName).getroot() for stateNode in root: stateName = stateNode.attrib["name"] startState = stateNode.attrib["start"] == "yes" endState = stateNode.attrib["end"] == "yes" if startState: originalPos = stateNode.attrib["originalpos"] self.__states.append(State(stateName, True, endState, originalPos)) else: self.__states.append(State(stateName, False, endState)) for stateNode in root: if "name" in stateNode.attrib: stateName = stateNode.attrib["name"] state = self.getState(stateName) for transitionNode in stateNode: stateName = transitionNode.attrib["name"] if "transitionname" in transitionNode.attrib: withName = transitionNode.attrib["transitionname"] else: withName = None if "topos" in transitionNode.attrib: rootToPos = transitionNode.attrib["topos"] else: rootToPos = None toState = self.getState(stateName) if toState is not None: for withNode in transitionNode: if "name" in withNode.attrib: withName = withNode.attrib["name"] if "topos" in withNode.attrib: toPos = withNode.attrib["topos"] else: toPos = None else: toPos = None if toPos is None: if rootToPos is None: self.addTransition(state, toState, withNode.text, withName) else: self.addTransition(state, toState, withNode.text, withName, rootToPos) else: self.addTransition(state, toState, withNode.text, withName, toPos)