def test_stop_words_filter(self): l = ["dd", "uu"] s = ["dd"] self.assertEqual([item for item in stop_words_filter(l, s)], ["uu"]) l = u"的 。 , 我们 是".split(" ") s = [u"的", u"。", u",", u"是"] self.assertEqual([item for item in stop_words_filter(l, s)], [u"我们"])
def text_word(self, text): result = dict() for word in stop_words_filter(self._segfunc(text), stopwordList=self._stopword): if word in result: result[word] += 1 else: result[word] = 1 return result
def weight_learning(self, textList = None): for text in textList: wordset =\ set([word for word in stop_words_filter(self._segfunc(text), self._stopword)]) for word in wordset: self.add_record(word) self.N += 1 return