def __init__(self, word: str): """ A constructor of SyllableList class which takes a String word as an input. First it creates a syllable list and a sbSyllable. Then it loops i times, where i ranges from 0 to length of given word, first it gets the ith character of given word and checks whether it is a vowel and the last character of the word. If it is a vowel it appends it to the sbSyllable and if it is the last vowel it also appends the next character to the sbSyllable. Then, it adds the sbSyllable to the syllables list. If it is not a vowel, and the sbSyllable's length is 1 also the previous character is a consonant it gets the last item of syllables list since there cannot be a Turkish word which starts with two consonants. However, if it is two last characters of word, then it adds it to the syllablelist. At the end, it updates the syllables list. PARAMETERS ---------- word : str String input. """ self.__syllables = [] sbSyllable = "" i = 0 while i < len(word): c = word[i] isVowel = TurkishLanguage.isVowel(c) isLastChar = (i == len(word) - 1) if isVowel: sbSyllable += c # If it is the last vowel. if i == len(word) - 2: sbSyllable += word[i + 1] i = i + 1 self.__syllables.append(Syllable(sbSyllable)) sbSyllable = "" else: # A syllable should not start with two consonants. tempSyl = sbSyllable if len(tempSyl) == 1: # The previous character was also a consonant. if not TurkishLanguage.isVowel(tempSyl[0]): if len(self.__syllables) == 0: sbSyllable += c i = i + 1 continue lastPos = len(self.__syllables) - 1 string = self.__syllables[lastPos].getText() string = string + tempSyl if isLastChar: # If the last char is also a consonant, add it to latest syllable. Ex: 'park'. string = string + c # Update previous syllable. self.__syllables[lastPos] = Syllable(string) sbSyllable = "" sbSyllable += c i = i + 1
def __resolveH(self, root: TxtWord, formation: str, beginningOfSuffix: bool, specialCaseTenseSuffix: bool, rootWord: bool): if root.isAbbreviation(): return formation + 'i' if beginningOfSuffix and TurkishLanguage.isVowel(self.__lastPhoneme(self.__formationToCheck)) and \ not specialCaseTenseSuffix: return formation if specialCaseTenseSuffix: if rootWord: if root.vowelAChangesToIDuringYSuffixation(): if TurkishLanguage.isFrontRoundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'ü' if TurkishLanguage.isFrontUnroundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'i' if TurkishLanguage.isBackRoundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'u' if TurkishLanguage.isBackUnroundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'ı' if TurkishLanguage.isVowel(self.__lastPhoneme(self.__formationToCheck)): if TurkishLanguage.isFrontRoundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'ü' if TurkishLanguage.isFrontUnroundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'i' if TurkishLanguage.isBackRoundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'u' if TurkishLanguage.isBackUnroundedVowel(self.__beforeLastVowel(self.__formationToCheck)): return formation[:len(formation) - 1] + 'ı' if TurkishLanguage.isFrontRoundedVowel(self.__lastVowel(self.__formationToCheck)) or \ (TurkishLanguage.isBackRoundedVowel(self.__lastVowel(self.__formationToCheck)) and root.notObeysVowelHarmonyDuringAgglutination()): return formation + 'ü' if TurkishLanguage.isFrontUnroundedVowel(self.__lastVowel(self.__formationToCheck)) or \ (self.__lastVowel(self.__formationToCheck) == 'a' and root.notObeysVowelHarmonyDuringAgglutination()): return formation + 'i' if TurkishLanguage.isBackRoundedVowel(self.__lastVowel(self.__formationToCheck)): return formation + 'u' if TurkishLanguage.isBackUnroundedVowel(self.__lastVowel(self.__formationToCheck)): return formation + 'ı' if root.isNumeral() or root.isFraction() or root.isReal(): if root.getName().endswith("6") or root.getName().endswith("40") or root.getName().endswith("60") \ or root.getName().endswith("90"): return formation + 'ı' else: if root.getName().endswith("3") or root.getName().endswith("4") or root.getName().endswith("00"): return formation + 'ü' else: if root.getName().endswith("9") or root.getName().endswith("10") or root.getName().endswith("30"): return formation + 'u' else: return formation + 'i' return formation
def __beforeLastVowel(self, stem: str) -> str: """ The beforeLastVowel method takes a str stem as an input. It loops through the given stem and returns the second last vowel. PARAMETERS ---------- stem : str String input. RETURNS ------- str Vowel before the last vowel. """ last = "0" before = 1 for i in range(len(stem) - 1, -1, -1): if TurkishLanguage.isVowel(stem[i]): if before == 1: last = stem[i] before = before - 1 continue return stem[i] return last
def resolveSh(self, formation: str) -> str: if TurkishLanguage.isVowel(formation[len(formation) - 1]): return formation + 'ş' else: if formation[len(formation) - 1] != 't': return formation else: return formation[len(formation) - 1] + 'd'
def startWithVowelorConsonantDrops(self) -> bool: if TurkishLanguage.isConsonantDrop(self.withFirstChar()) and self.__with != "ylA" and self.__with != "ysA" \ and self.__with != "ymHs" and self.__with != "yDH" and self.__with != "yken": return True if self.withFirstChar() == "A" or self.withFirstChar( ) == "H" or TurkishLanguage.isVowel(self.withFirstChar()): return True return False
def lastVowel(self, stem: str) -> str: for i in range(len(stem) - 1, -1, -1): if TurkishLanguage.isVowel(stem[i]): return stem[i] for i in range(len(stem) - 1, -1, -1): if "0" <= stem[i] <= "9": return stem[i] return "0"
def beforeLastVowel(self, stem: str) -> str: last = "0" before = 1 for i in range(len(stem) - 1, -1, -1): if TurkishLanguage.isVowel(stem[i]): if before == 1: last = stem[i] before = before - 1 continue return stem[i] return last
def __startWithVowelorConsonantDrops(self) -> bool: """ The startWithVowelorConsonantDrops method checks for some cases. If the first character of with variable is "nsy", and with variable does not equal to one of the Strings; "ylA, ysA, ymHs, yDH, yken", it returns true. If Or, if the first character of with variable is 'A, H: or any other vowels, it returns true. RETURNS ------- bool True if it starts with vowel or consonant drops, false otherwise. """ if TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and self.__with != "ylA" and self.__with != "ysA" \ and self.__with != "ymHs" and self.__with != "yDH" and self.__with != "yken": return True if self.__withFirstChar() == "A" or self.__withFirstChar() == "H" or \ TurkishLanguage.isVowel(self.__withFirstChar()): return True return False
def __lastVowel(self, stem: str) -> str: """ The lastVowel method takes a str stem as an input. It loops through the given stem and returns the last vowel. PARAMETERS ---------- stem : str String input. RETURNS ------- str The last vowel. """ for i in range(len(stem) - 1, - 1, -1): if TurkishLanguage.isVowel(stem[i]): return stem[i] for i in range(len(stem) - 1, -1, -1): if "0" <= stem[i] <= "9": return stem[i] return "0"
def __resolveSh(self, formation: str) -> str: """ The resolveSh method takes a str formation as an input. If the last character is a vowel, it concatenates given formation with ş, if the last character is not a vowel, and not 't' it directly returns given formation, but if it is equal to 't', it transforms it to 'd'. PARAMETERS ---------- formation : str String input. RETURNS ------- str Resolved String. """ if TurkishLanguage.isVowel(formation[len(formation) - 1]): return formation + 'ş' else: if formation[len(formation) - 1] != 't': return formation else: return formation[len(formation) - 1] + 'd'
def makeTransition(self, root: TxtWord, stem: str, startState: State) -> str: rootWord = root.getName() == stem or (root.getName() + "'") == stem formation = stem i = 0 if self.__with == "0": return stem if (stem == "bu" or stem == "şu" or stem == "o") and rootWord and self.__with == "ylA": return stem + "nunla" if self.__with == "yA": if stem == "ben": return "bana" self.__formationToCheck = stem if rootWord and self.__withFirstChar() == "y" and root.vowelEChangesToIDuringYSuffixation() \ and self.__with[1] != "H": formation = stem[:len(stem) - 1] + "i" self.__formationToCheck = formation else: if rootWord and (self.__with == "Hl" or self.__with == "Hn") and root.lastIdropsDuringPassiveSuffixation(): formation = stem[:len(stem) - 2] + stem[len(stem) - 1] self.__formationToCheck = stem else: if rootWord and root.showsSuRegularities() and self.__startWithVowelorConsonantDrops() and \ not self.__with.startswith("y"): formation = stem + 'y' self.__formationToCheck = formation else: if rootWord and root.duplicatesDuringSuffixation() and TurkishLanguage.isConsonantDrop( self.__with[0]): if self.softenDuringSuffixation(root): if self.__lastPhoneme(stem) == "p": formation = stem[:len(stem) - 1] + "bb" elif self.__lastPhoneme(stem) == "t": formation = stem[:len(stem) - 1] + "dd" else: formation = stem + stem[len(stem) - 1] self.__formationToCheck = formation else: if rootWord and root.lastIdropsDuringSuffixation() and \ not startState.getName().startswith( "VerbalRoot") and not startState.getName().startswith("ProperRoot") \ and self.__startWithVowelorConsonantDrops(): if self.softenDuringSuffixation(root): if self.__lastPhoneme(stem) == "p": formation = stem[:len(stem) - 2] + 'b' elif self.__lastPhoneme(stem) == "t": formation = stem[:len(stem) - 2] + 'd' elif self.__lastPhoneme(stem) == "ç": formation = stem[:len(stem) - 2] + 'c' else: formation = stem[:len(stem) - 2] + stem[len(stem) - 1] self.__formationToCheck = stem else: if self.__lastPhoneme(stem) == "p": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'b' elif self.__lastPhoneme(stem) == "t": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'd' elif self.__lastPhoneme(stem) == "ç": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'c' elif self.__lastPhoneme(stem) == "g": if self.__startWithVowelorConsonantDrops() and rootWord and \ self.softenDuringSuffixation(root): formation = stem[:len(stem) - 1] + 'ğ' elif self.__lastPhoneme(stem) == "k": if self.__startWithVowelorConsonantDrops() and rootWord and root.endingKChangesIntoG() \ and not root.isProperNoun(): formation = stem[:len(stem) - 1] + 'g' else: if self.__startWithVowelorConsonantDrops() and (not rootWord or ( self.softenDuringSuffixation(root) and ( not root.isProperNoun() or startState.__str__() != "ProperRoot"))): formation = stem[:len(stem) - 1] + 'ğ' self.__formationToCheck = formation if TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and not TurkishLanguage.isVowel(stem[len(stem) - 1])\ and (root.isNumeral() or root.isReal() or root.isFraction() or root.isTime() or root.isDate() or root.isPercent() or root.isRange()) \ and (root.getName().endswith("1") or root.getName().endswith("3") or root.getName().endswith("4") or root.getName().endswith("5") or root.getName().endswith("8") or root.getName().endswith("9") or root.getName().endswith("10") or root.getName().endswith("30") or root.getName().endswith("40") or root.getName().endswith("60") or root.getName().endswith("70") or root.getName().endswith("80") or root.getName().endswith("90") or root.getName().endswith("00")): if self.__with[0] == "'": formation = formation + "'" i = 2 else: i = 1 else: if (TurkishLanguage.isConsonantDrop(self.__withFirstChar()) and TurkishLanguage.isConsonant( self.__lastPhoneme(stem))) or (rootWord and root.consonantSMayInsertedDuringPossesiveSuffixation()): if self.__with[0] == "'": formation = formation + "'" if root.isAbbreviation(): i = 1 else: i = 2 else: i = 1 while i < len(self.__with): if self.__with[i] == "D": formation = self.__resolveD(root, formation) elif self.__with[i] == "A": formation = self.__resolveA(root, formation, rootWord) elif self.__with[i] == "H": if self.__with[0] != "'": formation = self.__resolveH(root, formation, i == 0, self.__with.startswith("Hyor"), rootWord) else: formation = self.__resolveH(root, formation, i == 1, False, rootWord) elif self.__with[i] == "C": formation = self.__resolveC(formation) elif self.__with[i] == "S": formation = self.__resolveS(formation) elif self.__with[i] == "Ş": formation = self.__resolveSh(formation) else: if i == len(self.__with) - 1 and self.__with[i] == "s": formation += "ş" else: formation += self.__with[i] self.__formationToCheck = formation i = i + 1 return formation