Ejemplo n.º 1
0
def pipeline(name):
    if name == 'naive_bayes_counts':
        return pipelines.naive_bayes_counts()
    elif name == 'naive_bayes_tfidf':
        return pipelines.naive_bayes_tfidf()
    elif name == 'naive_bayes_tfidf_stopwords':
        return pipelines.naive_bayes_tfidf_stopwords()
    elif name == 'naive_bayes_bigram':
        return pipelines.naive_bayes_bigram()
    elif name == 'naive_bayes_trigram':
        return pipelines.naive_bayes_trigram()
    elif name == 'naive_bayes_counts_lex':
        return pipeline_with_lexicon.naive_bayes_counts_lex()
    elif name == 'svm_libsvc_counts':
        return pipelines.svm_libsvc_counts()
    elif name == 'svm_libsvc_tfidf':
        return pipelines.svm_libsvc_tfidf()
    elif name == 'svm_libsvc_tfidf_stopwords':
        return pipelines.svm_libsvc_tfidf_stopwords()
    elif name == 'svm_libsvc_counts_bigram':
        return pipelines.svm_libsvc_counts_bigram()
    elif name == 'svm_libsvc_embed':
        return pipelines.svm_libsvc_embed()
    elif name == 'svm_sigmoid_embed':
        return pipelines.svm_sigmoid_embed()
    elif name == 'random_forest_tfidf':
        return pipelines.random_forest_tfidf()
    elif name == 'random_forest_tfidf_stopwords':
        return pipelines.random_forest_tfidf_stopwords()
    elif name == 'random_forest_embed':
        return pipelines.random_forest_embed()
    else:
        raise ValueError(
            "pipeline name is unknown. You can add a custom pipeline in 'pipelines'"
        )
Ejemplo n.º 2
0
def test_grid_search():
    task = of.Offenseval()
    task.load(offenseval_data_dir)
    train_X, train_y, test_X, test_y = utils.get_instances(
        task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01)
    params = {'clf__C': (0.1, 1)}
    best_sys_y = utils.grid_search(pipelines.svm_libsvc_counts(), params,
                                   train_X, train_y, test_X)
    assert len(best_sys_y) == len(test_y)
Ejemplo n.º 3
0
def pipeline(name):
    if name == 'naive_bayes':
        return pipelines.naive_bayes()
    elif name == 'svm_libsvc_counts':
        return pipelines.svm_libsvc_counts()
    elif name == 'svm_libsvc_tfidf':
        return pipelines.svm_libsvc_tfidf()
    elif name == 'svm_libsvc_embed':
        return pipelines.svm_libsvc_embed()
    elif name == 'svm_sigmoid_embed':
        return pipelines.svm_sigmoid_embed()
    else:
        raise ValueError("pipeline name is unknown. You can add a custom pipeline in 'pipelines'")