Ejemplo n.º 1
0
def train_model_best(is_training=False, model_name='S2-gb', classifier=lr):
    model_best = stst.Model(model_name, classifier)
    model_best.add(TFFeature(type='word', convey='count', load=True))
    model_best.add(TFFeature(type='char', convey='count', load=True))
    model_best.add(BigramFeature(type='word', convey='count', load=True))
    model_best.add(BigramFeature(type='char', convey='count', load=True))

    emb_wd_50_file = config.EMB_WORD_DIR + '/embedding.50'
    emb_wd_100_file = config.EMB_WORD_DIR + '/embedding.100'
    emb_wd_200_file = config.EMB_WORD_DIR + '/embedding.200'
    emb_wd_300_file = config.EMB_WORD_DIR + '/embedding.300'

    headlines_vec = config.EMB_WORD_DIR + '/headlines.vec'

    model_best.add(
        MinAvgMaxEmbeddingFeature('headlines',
                                  100,
                                  headlines_vec,
                                  pooling_type='avg',
                                  load=True))
    model_best.add(
        MinAvgMaxEmbeddingFeature('headlines',
                                  100,
                                  headlines_vec,
                                  pooling_type='all',
                                  load=True))

    if is_training:
        train_nlpcc(model_best)
        dev_nlpcc(model_best)
    return model_best
Ejemplo n.º 2
0
def stack():
    model_stack = stst.Model('Stack1', boosting)

    model_stack.add(model_best_stack)
    model_stack.add(xgb_model_best_stack)
    model_stack.add(model_emb_stack)

    train_nlpcc(model_stack)

    model_stack.feature_list = []
    model_stack.add(model_best)
    model_stack.add(model_emb)
    model_stack.add(xgb_model_best)

    dev_nlpcc(model_stack)
Ejemplo n.º 3
0
        word_count += len(words)
        char_count += len(chars)

    print(word_count / len(instances))

    print(char_count / len(instances))


# Define Model
lr = stst.Classifier(stst.LIB_LINEAR_LR())
svm = stst.Classifier(stst.skLearn_svm())
xgb = stst.Classifier(stst.XGBOOST_prob())
boosting = stst.Classifier(stst.sklearn_GradientBoosting())

model = stst.Model('S-lr-expand', lr)

# model.add(FuckFeature(load=True))
model.add(TFFeature(type='word', convey='count', load=False))
model.add(TFFeature(type='char', convey='count', load=False))
model.add(BigramFeature(type='word', convey='count', load=False))
model.add(BigramFeature(type='char', convey='count', load=False))

headlines_vec = config.EMB_WORD_DIR + '/headlines.vec'

model.add(
    MinAvgMaxEmbeddingFeature('headlines',
                              100,
                              headlines_vec,
                              pooling_type='avg',
                              load=False))