예제 #1
0
    def vector(self, text):
	seg = segment(preprocess(text))
	word_count = len(seg)
	x = [0] * self.n
	for w in seg:
	    if w in self.dic:
		x[self.dic[w]] += 1.0 / word_count * math.log(self.doc_count / self.df[w])
	return x
예제 #2
0
 def add(self, text, emotion=0):
     self.total_c += 1
     self.count_c[emotion] = self.count_c.get(emotion, 0) + 1
     text = preprocess(text)
     words = segment(text)
     for w in words:
         self.total_w += 1
         self.count_w[w] = self.count_w.get(w, 0) + 1
         self.count_cw[(emotion, w)] = self.count_cw.get((emotion, w), 0) + 1
예제 #3
0
    def train(self, text):
	self.doc_count += 1
	seg = segment(preprocess(text))
	for w in seg:
	    if w in self.dic:
		self.df[w] = self.df.get(w, 0) + 1