def debug(self): ws_1 = set(self.batch_extract_feature().keys()) ST.remove_emoticon(self._train_xs) ws_2 = set(self.batch_extract_feature().keys()) linfo('uni_bi_icon_feature_cnt: %s. no_icon: %s' % (len(ws_1), len(ws_2))) rms = ws_2 - ws_1 for x in rms: print x
def train_discret_model(self, **config): linfo('begin train helper discret model: %s' % config) if not config['emoticon']: ST.remove_emoticon(self._train_xs) if not config['parenthesis']: ST.remove_parenthesis(self._train_xs) self.txt2bags = {} self.w2id = self.batch_extract_feature() linfo('end train helper discret model')
def train(self): self._train_xs, self._train_ys = ST.load_data(self._path) if not self._emoticon: ST.remove_emoticon(self._train_xs) self.gram2gid = self._discretize_gram2gid() X = self.build_sparse_X(self._train_xs) self.clf.fit(X, self._train_ys) self.real_test()
def train(self, icon=True, cross=False): #word2cnt = BayesClassifier.Word2Cnt() #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#' #return #self._load_data() #self._replace_url(fill=True) self._train_xs, self._train_ys = ST.load_data(self._train_path) ST.replace_url(self._train_xs, fill=True) if not icon: ST.remove_emoticon(self._train_xs) self._train(cross_validation=cross)
def train(self,cross_validation=False, fold_sz=10, test_path='../../test_data/tri_test_data'): self._train_xs, self._train_ys = ST.load_data(self._path) if not self._emoticon: ST.remove_emoticon(self._train_xs) self.gram2gid = self._discretize_gram2gid() if cross_validation: linfo('begin to cross train') self._cross_train(fold_sz) else: classifier = self._train(self._train_xs, self._train_ys) self._test_xs, self._test_ys = ST.load_data(test_path) ST.replace_url(self._test_xs, fill='H') ST.replace_target(self._test_xs, fill='T') test_set = [(self._feature_encoding(txt), tag) for txt, tag in zip(self._test_xs, self._test_ys)] linfo('maxent classifier precision: %.4f' % classify.accuracy(classifier, test_set))