def test_levenshtein(): got = distance.levenshtein('mami', 'pami', 1, 1, 1) assert_equals(got, 1) got = distance.levenshtein('mami', 'pami', 1, 1, 0) assert_equals(got, 0) got = distance.levenshtein('unko', 'unk', 1, 3, 1) assert_equals(got, 3) got = distance.levenshtein('chiko', 'chinko', 3, 1, 1) assert_equals(got, 3)
def test_levenshtein(): got = distance.levenshtein('mami', 'pami', 1, 1, 1) assert got == 1 got = distance.levenshtein('mami', 'pami', 1, 1, 0) assert got == 0 got = distance.levenshtein('unko', 'unk', 1, 3, 1) assert got == 3 got = distance.levenshtein('chiko', 'chinko', 3, 1, 1) assert got == 3
def del_duplicate_word(self, all_words): for (i, j) in combinations(all_words.keys(), 2): if len(i) != len(j): continue i, j = normalize.normalize_word(i), normalize.normalize_word(j) if levenshtein(i, j) == 0: if all_words[i].count > all_words[j].count: big, small = i, j else: big, small = j, i all_words[big].count += all_words[small].count self.del_word(small, all_words) return all_words
def levenshtein_synonym_unify(self, pair): pair = list(map(self.synonym.unify, pair)) total_length = sum(map(len, pair)) if total_length: return 1 - (levenshtein(*pair) / total_length) return 0
def levenshtein_per_char_yomi(pair): yomi_pair = list(map(mecab.to_yomi, pair)) total_yomi_length = sum(map(len, yomi_pair)) if total_yomi_length: return 1 - (levenshtein(*yomi_pair) / total_yomi_length) return 0
def levenshtein_per_word(pair): nouns_pair = list(map(mecab.extract_word, pair)) total_noun_length = sum(map(len, nouns_pair)) if total_noun_length: return 1 - (levenshtein(*nouns_pair) / total_noun_length) return 0
def levenshtein_per_char(pair): total_length = sum(map(len, pair)) if total_length: return 1 - (levenshtein(*pair) / total_length) return 0