예제 #1
0
파일: wordcount.py 프로젝트: 5470x3/atango
 def del_duplicate_word(self, all_words):
     for (i, j) in combinations(all_words.keys(), 2):
         if len(i) != len(j):
             continue
         i, j = normalize.normalize_word(i), normalize.normalize_word(j)
         if levenshtein(i, j) == 0:
             if all_words[i].count > all_words[j].count:
                 big, small = i, j
             else:
                 big, small = j, i
             all_words[big].count += all_words[small].count
             self.del_word(small, all_words)
     return all_words
예제 #2
0
def test_normalize_word():
    got = normalize.normalize_word(u'おマンコ')
    assert got == u'おまんこ'
예제 #3
0
def test_normalize_word():
    got = normalize.normalize_word(u'おマンコ')
    assert got == u'おまんこ'
예제 #4
0
def test_normalize_word():
    got = normalize.normalize_word(u'おマンコ')
    assert_equals(got, u'おまんこ')