def find_suffix(self, word): genel = [] word_list = [] if type(word) is str: word = to_lower(word) word_list = self.arr_word_list(word) else: pass if not word_list: return [] for mstem in word_list: if len(mstem) > 1: suffix = mstem[-1] else: genel.append(mstem) continue if not suffix: genel.append(mstem) _stem = mstem[0] stem = dstem_temizleyici.sub('', _stem) stem_type = _stem[_stem.find('(') + 1:_stem.find(')')] fs = self.find_from_list(stem[-1], suffix, stem_type) if fs: for x in fs: genel.append(mstem[:-1] + x) return genel
def __init__(self, word): word = to_lower(word) word = replace_cap_letter(word) if not word or (word.isalpha is False): self.result = [] self.stem_list = [] self.stems = [] self.word = word self.stem_list = self.find_stem() # print(self.stem_list) self.result = self.find_suffix(word) if self.result: self.stems = list(set([x[0] for x in self.result])) self.result = list(set(['+'.join(z) for z in self.result]))
def find_stem(self): word = self.word temp_stem_list = [] first_syllable = spellword(word) if first_syllable is False: return [] else: first_syllable = first_syllable[0] search_in_dict = [ x for x in sozluk if x[0][:len(first_syllable)] == first_syllable ] if not search_in_dict and first_syllable[-1] in 'bcdgğ': yumusama_harfleri = { 'b': 'p', 'c': 'ç', 'd': 't', 'g': 'k', 'ğ': 'k' } first_syllable = first_syllable[:-1] + yumusama_harfleri[ first_syllable[-1]] search_in_dict = [ x for x in sozluk if x[0][:len(first_syllable)] == first_syllable ] if not search_in_dict: return [] else: word = first_syllable + word[len(first_syllable):] if word.startswith('di'): temp_stem_list.append(('di', ('de', 'fiil'), 0)) elif word.startswith('yi'): temp_stem_list.append(('yi', ('ye', 'fiil'), 0)) mstem = '' _mstem = '' yor_bul = re.search('[ıiuü]yor', word) if yor_bul: x, y = yor_bul.span() if x == 0: yor_bul = re.search('[ıiuü]*yor', word[x:]) if yor_bul: x, y = yor_bul.span() mstem = word[:x] if yor_bul.group()[0] in ('ı', 'u'): _mstem = mstem + 'a' mstem = word[:x + 1] elif yor_bul.group()[0] in ('i', 'ü'): _mstem = mstem + 'e' mstem = word[:x + 1] else: mstem = '' else: mstem = word[:x] if yor_bul.group()[0] in ('ı', 'u'): _mstem = mstem + 'a' mstem = word[:x + 1] elif yor_bul.group()[0] in ('i', 'ü'): _mstem = mstem + 'e' mstem = word[:x + 1] else: mstem = '' for stem in search_in_dict: if (stem[2] == 'AKR') and (word.startswith(stem[0])): larler_dict = { 'mler': '+m(1. Tekil Kişi İyelik Eki){içe-2}', 'mlar': '+m(1. Tekil Kişi İyelik Eki){içe-2}', 'nler': '+n(2. Tekil Kişi İyelik Eki){içe-2}', 'nlar': '+n(2. Tekil Kişi İyelik Eki){içe-2}' } larler_liste = ('mler', 'nler', 'mlar', 'nlar') sonrasi = word[len(stem[0]):] for larlerek in larler_liste: if sonrasi.startswith(larlerek): suff = word[len(stem[0]) + 1:] temp_stem_list.append( (stem[0] + '(isim)' + larler_dict[larlerek], suff, 0)) tlstem = to_lower(stem[0]) rpstem = replace_cap_letter(tlstem) if stem[1] in not_get_suffix: if word == rpstem: temp_stem_list.append((stem[0], stem[1], 0)) continue else: continue if mstem and (stem[1] == 'fiil') and (_mstem == rpstem): temp_stem_list.append((mstem, (_mstem, stem[1]), 0)) if word == rpstem: temp_stem_list.append((stem[0], stem[1], 0)) if (word != rpstem) and (word.startswith(rpstem)): levenshtein_dist = lddistance(word, rpstem) temp_stem_list.append((tlstem, stem[1], levenshtein_dist)) # continue letter_harmony = re.findall('\((\w+)\)', stem[2]) rp_letter_harmony = letter_harmony if letter_harmony: rp_letter_harmony = replace_cap_letter(rp_letter_harmony[0]) if word.startswith(rp_letter_harmony): levenshtein_dist = lddistance(word, rp_letter_harmony) temp_stem_list.append( (letter_harmony[0], (stem[0], stem[1]), levenshtein_dist)) organized_list = sorted(unrepeated_list(temp_stem_list), key=itemgetter(2)) return organized_list