def real_test(self): self._test_xs, self._test_ys = ST.load_data(self.test_path) ST.replace_url(self._test_xs, fill='H') ST.replace_target(self._test_xs, fill='T') #x_y = [(self.discret_txt(txt), y) for txt, y in zip(self._test_xs, self._test_ys)] test_mat = self.build_sparse_X(self._test_xs) self.accuracy(test_mat, self._test_ys)
def test(): obj_stats_path = '../train_data/stat_obj_train_data' out_path = '../train_data/Dg_obj_stats' txts = [] with open(obj_stats_path, 'r') as f: for line in f: dic = json.loads(line.strip()) tag, txt = dic.items()[0] txts.append(txt) linfo('obj stats count: %s' % (len(txts))) ST.replace_url(txts, fill='H') ST.replace_target(txts, fill='T') for x in txts: dic = {'O':x} write(out_path, 'a', '%s\n' % json.dumps(dic))
def test(): obj_stats_path = '../train_data/stat_obj_train_data' out_path = '../train_data/Dg_obj_stats' txts = [] with open(obj_stats_path, 'r') as f: for line in f: dic = json.loads(line.strip()) tag, txt = dic.items()[0] txts.append(txt) linfo('obj stats count: %s' % (len(txts))) ST.replace_url(txts, fill='H') ST.replace_target(txts, fill='T') for x in txts: dic = {'O': x} write(out_path, 'a', '%s\n' % json.dumps(dic))
def train(self,cross_validation=False, fold_sz=10, test_path='../../test_data/tri_test_data'): self._train_xs, self._train_ys = ST.load_data(self._path) if not self._emoticon: ST.remove_emoticon(self._train_xs) self.gram2gid = self._discretize_gram2gid() if cross_validation: linfo('begin to cross train') self._cross_train(fold_sz) else: classifier = self._train(self._train_xs, self._train_ys) self._test_xs, self._test_ys = ST.load_data(test_path) ST.replace_url(self._test_xs, fill='H') ST.replace_target(self._test_xs, fill='T') test_set = [(self._feature_encoding(txt), tag) for txt, tag in zip(self._test_xs, self._test_ys)] linfo('maxent classifier precision: %.4f' % classify.accuracy(classifier, test_set))