def real_test(self): self._test_xs, self._test_ys = ST.load_data(self.test_path) ST.replace_url(self._test_xs, fill='H') ST.replace_target(self._test_xs, fill='T') #x_y = [(self.discret_txt(txt), y) for txt, y in zip(self._test_xs, self._test_ys)] test_mat = self.build_sparse_X(self._test_xs) self.accuracy(test_mat, self._test_ys)
def train(self, icon=True, cross=False): #word2cnt = BayesClassifier.Word2Cnt() #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#' #return #self._load_data() #self._replace_url(fill=True) self._train_xs, self._train_ys = ST.load_data(self._train_path) ST.replace_url(self._train_xs, fill=True) if not icon: ST.remove_emoticon(self._train_xs) self._train(cross_validation=cross)
def train(self, icon=True, cross=False): #word2cnt = BayesClassifier.Word2Cnt() #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#' #return #self._load_data() #self._replace_url(fill=True) self._train_xs, self._train_ys = ST.load_data(self._train_path) ST.replace_url(self._train_xs, fill=True) if not icon: ST.remove_emoticon(self._train_xs) self._train(cross_validation=cross)
def test(): obj_stats_path = '../train_data/stat_obj_train_data' out_path = '../train_data/Dg_obj_stats' txts = [] with open(obj_stats_path, 'r') as f: for line in f: dic = json.loads(line.strip()) tag, txt = dic.items()[0] txts.append(txt) linfo('obj stats count: %s' % (len(txts))) ST.replace_url(txts, fill='H') ST.replace_target(txts, fill='T') for x in txts: dic = {'O':x} write(out_path, 'a', '%s\n' % json.dumps(dic))
def test(): obj_stats_path = '../train_data/stat_obj_train_data' out_path = '../train_data/Dg_obj_stats' txts = [] with open(obj_stats_path, 'r') as f: for line in f: dic = json.loads(line.strip()) tag, txt = dic.items()[0] txts.append(txt) linfo('obj stats count: %s' % (len(txts))) ST.replace_url(txts, fill='H') ST.replace_target(txts, fill='T') for x in txts: dic = {'O': x} write(out_path, 'a', '%s\n' % json.dumps(dic))
def train(self,cross_validation=False, fold_sz=10, test_path='../../test_data/tri_test_data'): self._train_xs, self._train_ys = ST.load_data(self._path) if not self._emoticon: ST.remove_emoticon(self._train_xs) self.gram2gid = self._discretize_gram2gid() if cross_validation: linfo('begin to cross train') self._cross_train(fold_sz) else: classifier = self._train(self._train_xs, self._train_ys) self._test_xs, self._test_ys = ST.load_data(test_path) ST.replace_url(self._test_xs, fill='H') ST.replace_target(self._test_xs, fill='T') test_set = [(self._feature_encoding(txt), tag) for txt, tag in zip(self._test_xs, self._test_ys)] linfo('maxent classifier precision: %.4f' % classify.accuracy(classifier, test_set))