Ejemplo n.º 1
0
 def real_test(self):
     self._test_xs, self._test_ys = ST.load_data(self.test_path)
     ST.replace_url(self._test_xs, fill='H')
     ST.replace_target(self._test_xs, fill='T')
     #x_y = [(self.discret_txt(txt), y) for txt, y in zip(self._test_xs, self._test_ys)]
     test_mat = self.build_sparse_X(self._test_xs)
     self.accuracy(test_mat, self._test_ys)
Ejemplo n.º 2
0
def test():
    obj_stats_path = '../train_data/stat_obj_train_data'
    out_path = '../train_data/Dg_obj_stats'
    txts = []
    with open(obj_stats_path, 'r') as f:
        for line in f:
            dic = json.loads(line.strip())
            tag, txt = dic.items()[0]
            txts.append(txt)
    linfo('obj stats count: %s' % (len(txts)))
    ST.replace_url(txts, fill='H')
    ST.replace_target(txts, fill='T')
    for x in txts:
        dic = {'O':x}
        write(out_path, 'a', '%s\n' % json.dumps(dic))
Ejemplo n.º 3
0
def test():
    obj_stats_path = '../train_data/stat_obj_train_data'
    out_path = '../train_data/Dg_obj_stats'
    txts = []
    with open(obj_stats_path, 'r') as f:
        for line in f:
            dic = json.loads(line.strip())
            tag, txt = dic.items()[0]
            txts.append(txt)
    linfo('obj stats count: %s' % (len(txts)))
    ST.replace_url(txts, fill='H')
    ST.replace_target(txts, fill='T')
    for x in txts:
        dic = {'O': x}
        write(out_path, 'a', '%s\n' % json.dumps(dic))
Ejemplo n.º 4
0
    def train(self,cross_validation=False, fold_sz=10, test_path='../../test_data/tri_test_data'):
        self._train_xs, self._train_ys = ST.load_data(self._path)
        if not self._emoticon:
            ST.remove_emoticon(self._train_xs)
        self.gram2gid = self._discretize_gram2gid()
        if cross_validation:
            linfo('begin to cross train')
            self._cross_train(fold_sz)
        else:
            classifier = self._train(self._train_xs, self._train_ys)

            self._test_xs, self._test_ys = ST.load_data(test_path)
            ST.replace_url(self._test_xs, fill='H')
            ST.replace_target(self._test_xs, fill='T')

            test_set = [(self._feature_encoding(txt), tag) for txt, tag in zip(self._test_xs, self._test_ys)]

            linfo('maxent classifier precision: %.4f' % classify.accuracy(classifier, test_set))