def _cut_text(self, buff): buff = helper.unicode2utf8(helper.unicodefy(buff)) words = self._seg.cut(buff) words = self._filter_words(words) # words = list(set(words)) return words
def _generate_word_key(self, word): word_key = constants.WORD_FREQ_KEY % helper.unicode2utf8(word) return word_key