def chose_the_best(array, word): full_fall0 = [] full_fall1 = [] word_ff = word.replace(u'о', u'').replace(u'е', u'').replace(u'ь', u'').replace(u'ъ', u'').replace(u'й', u'и') for w in array: w_ff = w.replace(u'о', u'').replace(u'е', u'').replace(u'ь', u'').replace(u'ъ', u'').replace(u'й', u'и') if w_ff == word_ff: full_fall0.append(w) elif levenshtein(w_ff, word_ff) == 1 and full_fall0 == []: full_fall1.append(w) if full_fall0 != []: return full_fall0 elif full_fall1 != []: return simplify_array(word, full_fall1) else: return array
def chose_the_best(array, word): full_fall0 = [] full_fall1 = [] word_ff = word.replace(u'о', u'').replace(u'е', u'').replace(u'ь', u'').replace( u'ъ', u'').replace(u'й', u'и') for w in array: w_ff = w.replace(u'о', u'').replace(u'е', u'').replace(u'ь', u'').replace( u'ъ', u'').replace(u'й', u'и') if w_ff == word_ff: full_fall0.append(w) elif levenshtein(w_ff, word_ff) == 1 and full_fall0 == []: full_fall1.append(w) if full_fall0 != []: return full_fall0 elif full_fall1 != []: return simplify_array(word, full_fall1) else: return array
def use_leven(word, up_dict, word_pos, gender=u'-'): l1 = [] l2 = [] for lemma in up_dict: if word_pos == up_dict[lemma][0].pos and gender == up_dict[lemma][0].gender: # а если омонимия на ур. лемм? l = levenshtein(word.replace(u'ѣ', u'е'), lemma.replace(u'ѣ', u'е')) if l == 1: l1.append(lemma) elif l == 2 and l1 == []: l2.append(lemma) if l1 != []: if len(l1) > 1: l1 = chose_the_best(l1, word) return 1, l1 elif l2 != []: if len(l2) > 1: l2 = chose_the_best(l2, word) return 2, l2 else: return u'no', u'no'
def simplify_array(word, full_fall0): if len(full_fall0) == 1: return full_fall0 else: complete_fall0 = [] complete_fall1 = [] word_cf = re.sub(u'[уъыаоэяиюье]', u'', word) for w in full_fall0: w_cf = re.sub(u'[уъыаоэяиюье]', u'', w) if w_cf == word_cf: complete_fall0.append(w) elif levenshtein(w_cf, word_cf) == 1 and complete_fall0 == []: complete_fall1.append(w) if complete_fall0 != []: return complete_fall0 elif complete_fall1 != []: return complete_fall1 else: print u'is it possible???', word for i in full_fall0: print i return u'no'
def use_leven(word, up_dict, word_pos, gender=u'-'): l1 = [] l2 = [] for lemma in up_dict: if word_pos == up_dict[lemma][0][ 'pos']: # а если омонимия на ур. лемм? l = levenshtein(word.replace(u'ѣ', u'е'), lemma.replace(u'ѣ', u'е')) if l == 1: l1.append(lemma) elif l == 2 and l1 == []: l2.append(lemma) if l1 != []: if len(l1) > 1: l1 = chose_the_best(l1, word) return 1, l1 elif l2 != []: if len(l2) > 1: l2 = chose_the_best(l2, word) return 2, l2 else: return u'no', u'no'