def del_duplicate_word(self, all_words): for (i, j) in combinations(all_words.keys(), 2): if len(i) != len(j): continue i, j = normalize.normalize_word(i), normalize.normalize_word(j) if levenshtein(i, j) == 0: if all_words[i].count > all_words[j].count: big, small = i, j else: big, small = j, i all_words[big].count += all_words[small].count self.del_word(small, all_words) return all_words
def test_normalize_word(): got = normalize.normalize_word(u'おマンコ') assert got == u'おまんこ'
def test_normalize_word(): got = normalize.normalize_word(u'おマンコ') assert_equals(got, u'おまんこ')