Example #1
0
 def real_test(self):
     self._test_xs, self._test_ys = ST.load_data(self.test_path)
     ST.replace_url(self._test_xs, fill='H')
     ST.replace_target(self._test_xs, fill='T')
     #x_y = [(self.discret_txt(txt), y) for txt, y in zip(self._test_xs, self._test_ys)]
     test_mat = self.build_sparse_X(self._test_xs)
     self.accuracy(test_mat, self._test_ys)
    def train(self, icon=True, cross=False):
        #word2cnt = BayesClassifier.Word2Cnt()

        #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#'
        #return
        #self._load_data()
        #self._replace_url(fill=True)
        self._train_xs, self._train_ys = ST.load_data(self._train_path)
        ST.replace_url(self._train_xs, fill=True)
        if not icon:
            ST.remove_emoticon(self._train_xs)
        self._train(cross_validation=cross)
 def train(self, icon=True, cross=False):
     #word2cnt = BayesClassifier.Word2Cnt()
     
     #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#'
     #return
     #self._load_data()
     #self._replace_url(fill=True)
     self._train_xs, self._train_ys = ST.load_data(self._train_path)
     ST.replace_url(self._train_xs, fill=True)
     if not icon:
         ST.remove_emoticon(self._train_xs)
     self._train(cross_validation=cross)
Example #4
0
def test():
    obj_stats_path = '../train_data/stat_obj_train_data'
    out_path = '../train_data/Dg_obj_stats'
    txts = []
    with open(obj_stats_path, 'r') as f:
        for line in f:
            dic = json.loads(line.strip())
            tag, txt = dic.items()[0]
            txts.append(txt)
    linfo('obj stats count: %s' % (len(txts)))
    ST.replace_url(txts, fill='H')
    ST.replace_target(txts, fill='T')
    for x in txts:
        dic = {'O':x}
        write(out_path, 'a', '%s\n' % json.dumps(dic))
Example #5
0
def test():
    obj_stats_path = '../train_data/stat_obj_train_data'
    out_path = '../train_data/Dg_obj_stats'
    txts = []
    with open(obj_stats_path, 'r') as f:
        for line in f:
            dic = json.loads(line.strip())
            tag, txt = dic.items()[0]
            txts.append(txt)
    linfo('obj stats count: %s' % (len(txts)))
    ST.replace_url(txts, fill='H')
    ST.replace_target(txts, fill='T')
    for x in txts:
        dic = {'O': x}
        write(out_path, 'a', '%s\n' % json.dumps(dic))
    def train(self,cross_validation=False, fold_sz=10, test_path='../../test_data/tri_test_data'):
        self._train_xs, self._train_ys = ST.load_data(self._path)
        if not self._emoticon:
            ST.remove_emoticon(self._train_xs)
        self.gram2gid = self._discretize_gram2gid()
        if cross_validation:
            linfo('begin to cross train')
            self._cross_train(fold_sz)
        else:
            classifier = self._train(self._train_xs, self._train_ys)

            self._test_xs, self._test_ys = ST.load_data(test_path)
            ST.replace_url(self._test_xs, fill='H')
            ST.replace_target(self._test_xs, fill='T')

            test_set = [(self._feature_encoding(txt), tag) for txt, tag in zip(self._test_xs, self._test_ys)]

            linfo('maxent classifier precision: %.4f' % classify.accuracy(classifier, test_set))