def test_stop_words_filter(self):
     l = ["dd", "uu"]
     s = ["dd"]
     self.assertEqual([item for item in stop_words_filter(l, s)], ["uu"])
     l = u"的 。 , 我们 是".split(" ")
     s = [u"的", u"。", u",", u"是"]
     self.assertEqual([item for item in stop_words_filter(l, s)], [u"我们"])
Beispiel #2
0
 def text_word(self, text):
     result = dict()
     for word in stop_words_filter(self._segfunc(text), stopwordList=self._stopword):
         if word in result:
             result[word] += 1
         else:
             result[word] = 1
     return result
Beispiel #3
0
    def weight_learning(self, textList = None):

        for text in textList:
            wordset =\
                set([word for word in stop_words_filter(self._segfunc(text), self._stopword)])
            for word in wordset:
                self.add_record(word)
            self.N += 1

        return