Exemple #1
0
 def decrease_duplicate_count(self, all_words):
     for (key, val) in self.sort_by_keys_length(all_words):
         for ngrams in ngram.to_ngrams(key, len(key)):
             for n_gram in filter(lambda x: x in all_words, set(ngrams)):
                 if key == n_gram:
                     continue
                 elif val.count == all_words[n_gram].count:
                     all_words = self.del_word(n_gram, all_words)
                 # else:
                 #    all_words[key].count -= all_words[n_gram].count
     return all_words
Exemple #2
0
def test_to_ngrams():
    got = ngram.to_ngrams(u'おまんこ', 4)
    assert got, [[u'おま', u'まん', u'んこ'], [u'おまん', u'まんこ'] == [u'おまんこ']]