def vector(self, text): seg = segment(preprocess(text)) word_count = len(seg) x = [0] * self.n for w in seg: if w in self.dic: x[self.dic[w]] += 1.0 / word_count * math.log(self.doc_count / self.df[w]) return x
def add(self, text, emotion=0): self.total_c += 1 self.count_c[emotion] = self.count_c.get(emotion, 0) + 1 text = preprocess(text) words = segment(text) for w in words: self.total_w += 1 self.count_w[w] = self.count_w.get(w, 0) + 1 self.count_cw[(emotion, w)] = self.count_cw.get((emotion, w), 0) + 1
def train(self, text): self.doc_count += 1 seg = segment(preprocess(text)) for w in seg: if w in self.dic: self.df[w] = self.df.get(w, 0) + 1