def __addWordWhenRootSoften(self, trie: Trie, last: chr, root: str, word: TxtWord): """ The addWordWhenRootSoften is used to add word to Trie whose last consonant will be soften. For instance, in the case of Dative Case Suffix, the word is 'müzik' when '-e' is added to the word, the last char is drooped and root became 'müzi' and by changing 'k' into 'ğ' the word transformed into 'müziğe' as in the example of 'Herkes müziğe doğru geldi'. In the case of accusative, possessive of third person and a derivative suffix, the word is 'kanat' when '-i' is added to word, last char is dropped, root became 'kana' then 't' transformed into 'd' and added to Trie. The word is changed into 'kanadı' as in the case of 'Kuşun kırık kanadı'. PARAMETERS ---------- trie : Trie the name of the Trie to add the word. last : chr the last char of the word to be soften. root : str the substring of the word whose last one or two chars are omitted from the word to bo softed. word : TxtWord the original word. """ if last == 'p': trie.addWord(root + 'b', word) elif last == 'ç': trie.addWord(root + 'c', word) elif last == 't': trie.addWord(root + 'd', word) elif last == 'k' or last == 'g': trie.addWord(root + 'ğ', word) else: pass
def setUp(self) -> None: self.simpleTrie = Trie() self.simpleTrie.addWord("azı", Word("azı")) self.simpleTrie.addWord("az", Word("az")) self.simpleTrie.addWord("ad", Word("ad")) self.simpleTrie.addWord("adi", Word("adi")) self.simpleTrie.addWord("adil", Word("adil")) self.simpleTrie.addWord("a", Word("a")) self.simpleTrie.addWord("adilane", Word("adilane")) self.simpleTrie.addWord("ısı", Word("ısı")) self.simpleTrie.addWord("ısıtıcı", Word("ısıtıcı")) self.simpleTrie.addWord("ölü", Word("ölü")) self.simpleTrie.addWord("ölüm", Word("ölüm")) self.simpleTrie.addWord("ören", Word("ören")) self.simpleTrie.addWord("örgü", Word("örgü")) self.complexTrie = Trie() dictionary = TxtDictionary("../../../turkish_dictionary.txt", "../../../turkish_misspellings.txt") for i in range(dictionary.size()): self.complexTrie.addWord( dictionary.getWordWithIndex(i).getName(), dictionary.getWordWithIndex(i))
def prepareTrie(self) -> Trie: """ The prepareTrie method is used to create a Trie with the given dictionary. First, it gets the word from dictionary, then checks some exceptions like 'ben' which does not fit in the consonant softening rule and transforms into 'bana', and later on it generates a root by removing the last char from the word however if the length of the word is greater than 1, it also generates the root by removing the last two chars from the word. Then, it gets the last char of the root and adds root and word to the result Trie. There are also special cases such as; lastIdropsDuringSuffixation condition, if it is true then addWordWhenRootSoften method will be used rather than addWord. Ex : metin + i = metni isPortmanteauEndingWithSI condition, if it is true then addWord method with rootWithoutLastTwo will be used. Ex : ademelması + lar = ademelmaları isPortmanteau condition, if it is true then addWord method with rootWithoutLast will be used. Ex : mısıryağı + lar = mısıryağları vowelEChangesToIDuringYSuffixation condition, if it is then addWord method with rootWithoutLast will be used depending on the last char whether it is 'e' or 'a'. Ex : ye + iniz - yiyiniz endingKChangesIntoG condition, if it is true then addWord method with rootWithoutLast will be used with added 'g'. Ex : ahenk + i = ahengi RETURNS ------- Trie the resulting Trie. """ result = Trie() lastBefore = ' ' for i in range(self.size()): word = self.getWordWithIndex(i) if isinstance(word, TxtWord): root = word.getName() length = len(root) if root == "ben": result.addWord("bana", word) rootWithoutLast = root[0:length - 1] if length > 1: rootWithoutLastTwo = root[0:length - 2] else: rootWithoutLastTwo = "" if length > 1: lastBefore = root[length - 2] last = root[length - 1] result.addWord(root, word) if word.lastIdropsDuringSuffixation( ) or word.lastIdropsDuringPassiveSuffixation(): if word.rootSoftenDuringSuffixation(): self.__addWordWhenRootSoften(result, last, rootWithoutLastTwo, word) else: result.addWord(rootWithoutLastTwo + last, word) if word.isPortmanteauEndingWithSI(): result.addWord(rootWithoutLastTwo, word) if word.rootSoftenDuringSuffixation(): self.__addWordWhenRootSoften(result, last, rootWithoutLast, word) if word.isPortmanteau(): if word.isPortmanteauFacedVowelEllipsis(): result.addWord(rootWithoutLastTwo + last + lastBefore, word) else: if word.isPortmanteauFacedSoftening(): if lastBefore == 'b': result.addWord(rootWithoutLastTwo + 'p', word) elif lastBefore == 'c': result.addWord(rootWithoutLastTwo + 'ç', word) elif lastBefore == 'd': result.addWord(rootWithoutLastTwo + 't', word) elif lastBefore == 'ğ': result.addWord(rootWithoutLastTwo + 'k', word) else: pass else: result.addWord(rootWithoutLast, word) if word.vowelEChangesToIDuringYSuffixation( ) or word.vowelAChangesToIDuringYSuffixation(): if last == 'e': result.addWord(rootWithoutLast, word) elif last == 'a': result.addWord(rootWithoutLast, word) else: pass if word.endingKChangesIntoG(): result.addWord(rootWithoutLast + 'g', word) return result
class TrieTest(unittest.TestCase): simpleTrie: Trie complexTrie: Trie def setUp(self) -> None: self.simpleTrie = Trie() self.simpleTrie.addWord("azı", Word("azı")) self.simpleTrie.addWord("az", Word("az")) self.simpleTrie.addWord("ad", Word("ad")) self.simpleTrie.addWord("adi", Word("adi")) self.simpleTrie.addWord("adil", Word("adil")) self.simpleTrie.addWord("a", Word("a")) self.simpleTrie.addWord("adilane", Word("adilane")) self.simpleTrie.addWord("ısı", Word("ısı")) self.simpleTrie.addWord("ısıtıcı", Word("ısıtıcı")) self.simpleTrie.addWord("ölü", Word("ölü")) self.simpleTrie.addWord("ölüm", Word("ölüm")) self.simpleTrie.addWord("ören", Word("ören")) self.simpleTrie.addWord("örgü", Word("örgü")) self.complexTrie = Trie() dictionary = TxtDictionary("../../../turkish_dictionary.txt", "../../../turkish_misspellings.txt") for i in range(dictionary.size()): self.complexTrie.addWord( dictionary.getWordWithIndex(i).getName(), dictionary.getWordWithIndex(i)) def test_GetWordsWithPrefixSimple(self): self.assertEqual({Word("a")}, self.simpleTrie.getWordsWithPrefix("a")) self.assertEqual({Word("a"), Word("ad")}, self.simpleTrie.getWordsWithPrefix("ad")) self.assertEqual( {Word("a"), Word("ad"), Word("adi")}, self.simpleTrie.getWordsWithPrefix("adi")) self.assertEqual( {Word("a"), Word("ad"), Word("adi"), Word("adil")}, self.simpleTrie.getWordsWithPrefix("adil")) self.assertEqual( { Word("a"), Word("ad"), Word("adi"), Word("adilane"), Word("adil") }, self.simpleTrie.getWordsWithPrefix("adilane")) self.assertEqual({Word("ölü")}, self.simpleTrie.getWordsWithPrefix("ölü")) self.assertEqual({Word("ölü"), Word("ölüm")}, self.simpleTrie.getWordsWithPrefix("ölüm")) self.assertEqual({Word("ısı")}, self.simpleTrie.getWordsWithPrefix("ısı")) self.assertEqual({Word("ısıtıcı"), Word("ısı")}, self.simpleTrie.getWordsWithPrefix("ısıtıcı")) def test_GetWordsWithPrefixComplex(self): self.assertEqual({Word("a")}, self.complexTrie.getWordsWithPrefix("a")) self.assertEqual({Word("a"), Word("ad")}, self.complexTrie.getWordsWithPrefix("ad")) self.assertEqual( {Word("a"), Word("ad"), Word("adi")}, self.complexTrie.getWordsWithPrefix("adi")) self.assertEqual( {Word("a"), Word("ad"), Word("adi"), Word("adil")}, self.complexTrie.getWordsWithPrefix("adil")) self.assertEqual( { Word("a"), Word("ad"), Word("adi"), Word("adilane"), Word("adil") }, self.complexTrie.getWordsWithPrefix("adilane")) self.assertEqual({Word("ölü"), Word("öl")}, self.complexTrie.getWordsWithPrefix("ölü")) self.assertEqual( {Word("ölü"), Word("öl"), Word("ölüm")}, self.complexTrie.getWordsWithPrefix("ölüm")) self.assertEqual({Word("ı"), Word("ısı")}, self.complexTrie.getWordsWithPrefix("ısı")) self.assertEqual( {Word("ı"), Word("ısıtıcı"), Word("ısıt"), Word("ısı")}, self.complexTrie.getWordsWithPrefix("ısıtıcı"))