import stst # Define Model gb = stst.Classifier(stst.GradientBoostingRegression()) model = stst.Model('S1-gb', gb) # Add features to the Model model.add(stst.WeightednGramMatchFeature(type='lemma')) model.add(stst.BOWFeature(stopwords=False)) model.add(stst.AlignmentFeature()) model.add(stst.IdfAlignmentFeature()) model.add(stst.NegativeFeature()) # train and test train_file = './data/stsbenchmark/sts-train.csv' dev_file = './data/stsbenchmark/sts-dev.csv' test_file = './data/stsbenchmark/sts-test.csv' # init the server and input the address nlp = stst.StanfordNLP('http://localhost:9000') # parse data train_instances = stst.load_parse_data(train_file, nlp) dev_instances = stst.load_parse_data(dev_file, nlp) # train and test model.train(train_instances, train_file) model.test(dev_instances, dev_file) # evaluation dev_pearsonr = stst.eval_output_file(model.output_file)
from __future__ import print_function import stst from features.features_unigram import UnigramFeature, FuckFeature, TFFeature, BigramFeature from features.features_unigram import MinAvgMaxEmbeddingFeature from stst import config from main_tools import * # Define Model lr = stst.Classifier(stst.LIB_LINEAR_LR()) svm = stst.Classifier(stst.skLearn_svm()) xgb = stst.Classifier(stst.XGBOOST_prob()) boosting = stst.Classifier(stst.sklearn_GradientBoosting()) model = stst.Model('S1-gb', lr) model.add(FuckFeature(load=False)) def train_model_best(is_training=False, model_name='S2-gb', classifier=lr): model_best = stst.Model(model_name, classifier) model_best.add(TFFeature(type='word', convey='count', load=True)) model_best.add(TFFeature(type='char', convey='count', load=True)) model_best.add(BigramFeature(type='word', convey='count', load=True)) model_best.add(BigramFeature(type='char', convey='count', load=True)) emb_wd_50_file = config.EMB_WORD_DIR + '/embedding.50' emb_wd_100_file = config.EMB_WORD_DIR + '/embedding.100' emb_wd_200_file = config.EMB_WORD_DIR + '/embedding.200' emb_wd_300_file = config.EMB_WORD_DIR + '/embedding.300' headlines_vec = config.EMB_WORD_DIR + '/headlines.vec' model_best.add(MinAvgMaxEmbeddingFeature('headlines', 100, headlines_vec, pooling_type='avg', load=True))
# coding: utf8 from input import data import stst from features.pmi_feature import * from features.warrant_feature import * from metric import evaluation classifier = stst.Classifier(stst.LIB_LINEAR_LR()) model = stst.Model('NLP', classifier) model.add(Warrant_Feature(load=False)) # model.add(BowFeature(load=False)) # model.add(BI_feature(load=False)) train_file = config.train_file train_instances = data.load_parse_data(train_file) dev_file = config.dev_file dev_instances = data.load_parse_data(dev_file) test_file = config.test_file test_instances = data.load_parse_data(test_file) model.train(train_instances, train_file) acc = evaluation.Evaluation(train_file, model.output_file) print(acc) model.test(dev_instances, dev_file) acc = evaluation.Evaluation(dev_file, model.output_file) print(acc)
import stst from features.pmi_feature import * from features.warrant_feature import * from features.nn_features import * from metric import evaluation train_file = config.train_file train_instances = data.load_parse_data(train_file) dev_file = config.dev_file dev_instances = data.load_parse_data(dev_file) test_file = config.test_file test_instances = data.load_parse_data(test_file) lr = stst.Classifier(stst.LIB_LINEAR_LR()) nlp_model = stst.Model('lr', lr) nlp_model.add(Warrant_Feature()) nlp_model.train(train_instances, train_file) nlp_model.test(dev_instances, dev_file) nlp_model.test(test_instances, test_file) vote = stst.Classifier(stst.VoteEnsemble()) model1 = stst.Model('vote1', vote) # model1.add(NNAVGFeature('intra_attention_cnn_margin', config.NN_RUN_DIR + '/run_intra_attention_cnn_margin_0121_19_04', load=False)) # 0.5 # model1.add(NNAVGFeature('intra_attention_cnn', config.NN_RUN_DIR + '/run_intra_attention_cnn_0121_19_03', load=False)) # 0.46 # model1.add(NNFeature('intra_attention_i', config.NN_RUN_DIR + '/run_intra_attention_i_0121_19_03', load=False)) # 0.5 # model1.add(NNAVGFeature('run_intra_attention_cnn_negclaim', config.NN_RUN_DIR + '/run_intra_attention_cnn_negclaim_0121_20_37', load=False))