예제 #1
0
 def debug(self):
     ws_1 = set(self.batch_extract_feature().keys())
     ST.remove_emoticon(self._train_xs)
     ws_2 = set(self.batch_extract_feature().keys())
     linfo('uni_bi_icon_feature_cnt: %s. no_icon: %s' % (len(ws_1), len(ws_2)))
     rms =  ws_2 - ws_1
     for x in rms:
         print x
예제 #2
0
 def train_discret_model(self, **config):
     linfo('begin train helper discret model: %s' % config)
     if not config['emoticon']:
         ST.remove_emoticon(self._train_xs)
     if not config['parenthesis']:
         ST.remove_parenthesis(self._train_xs)
     self.txt2bags = {}
     self.w2id = self.batch_extract_feature()
     linfo('end train helper discret model')
예제 #3
0
 def train_discret_model(self, **config):
     linfo('begin train helper discret model: %s' % config)
     if not config['emoticon']:
         ST.remove_emoticon(self._train_xs)
     if not config['parenthesis']:
         ST.remove_parenthesis(self._train_xs)
     self.txt2bags = {}
     self.w2id = self.batch_extract_feature()
     linfo('end train helper discret model')
예제 #4
0
 def debug(self):
     ws_1 = set(self.batch_extract_feature().keys())
     ST.remove_emoticon(self._train_xs)
     ws_2 = set(self.batch_extract_feature().keys())
     linfo('uni_bi_icon_feature_cnt: %s. no_icon: %s' %
           (len(ws_1), len(ws_2)))
     rms = ws_2 - ws_1
     for x in rms:
         print x
예제 #5
0
    def train(self):
        self._train_xs, self._train_ys = ST.load_data(self._path)
        if not self._emoticon:
            ST.remove_emoticon(self._train_xs)
        self.gram2gid = self._discretize_gram2gid()
        X = self.build_sparse_X(self._train_xs)

        self.clf.fit(X, self._train_ys)

        self.real_test()
예제 #6
0
    def train(self, icon=True, cross=False):
        #word2cnt = BayesClassifier.Word2Cnt()

        #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#'
        #return
        #self._load_data()
        #self._replace_url(fill=True)
        self._train_xs, self._train_ys = ST.load_data(self._train_path)
        ST.replace_url(self._train_xs, fill=True)
        if not icon:
            ST.remove_emoticon(self._train_xs)
        self._train(cross_validation=cross)
예제 #7
0
 def train(self, icon=True, cross=False):
     #word2cnt = BayesClassifier.Word2Cnt()
     
     #txt = '今天天气就是棒[哈哈] [太阳] [飞起来]#'
     #return
     #self._load_data()
     #self._replace_url(fill=True)
     self._train_xs, self._train_ys = ST.load_data(self._train_path)
     ST.replace_url(self._train_xs, fill=True)
     if not icon:
         ST.remove_emoticon(self._train_xs)
     self._train(cross_validation=cross)
예제 #8
0
    def train(self,cross_validation=False, fold_sz=10, test_path='../../test_data/tri_test_data'):
        self._train_xs, self._train_ys = ST.load_data(self._path)
        if not self._emoticon:
            ST.remove_emoticon(self._train_xs)
        self.gram2gid = self._discretize_gram2gid()
        if cross_validation:
            linfo('begin to cross train')
            self._cross_train(fold_sz)
        else:
            classifier = self._train(self._train_xs, self._train_ys)

            self._test_xs, self._test_ys = ST.load_data(test_path)
            ST.replace_url(self._test_xs, fill='H')
            ST.replace_target(self._test_xs, fill='T')

            test_set = [(self._feature_encoding(txt), tag) for txt, tag in zip(self._test_xs, self._test_ys)]

            linfo('maxent classifier precision: %.4f' % classify.accuracy(classifier, test_set))