Esempio n. 1
0
def pipeline(name):
    if name == 'naive_bayes_counts':
        return pipelines.naive_bayes_counts()
    elif name == 'naive_bayes_tfidf':
        return pipelines.naive_bayes_tfidf()
    elif name == 'naive_bayes_tfidf_stopwords':
        return pipelines.naive_bayes_tfidf_stopwords()
    elif name == 'naive_bayes_bigram':
        return pipelines.naive_bayes_bigram()
    elif name == 'naive_bayes_trigram':
        return pipelines.naive_bayes_trigram()
    elif name == 'naive_bayes_counts_lex':
        return pipeline_with_lexicon.naive_bayes_counts_lex()
    elif name == 'svm_libsvc_counts':
        return pipelines.svm_libsvc_counts()
    elif name == 'svm_libsvc_tfidf':
        return pipelines.svm_libsvc_tfidf()
    elif name == 'svm_libsvc_tfidf_stopwords':
        return pipelines.svm_libsvc_tfidf_stopwords()
    elif name == 'svm_libsvc_counts_bigram':
        return pipelines.svm_libsvc_counts_bigram()
    elif name == 'svm_libsvc_embed':
        return pipelines.svm_libsvc_embed()
    elif name == 'svm_sigmoid_embed':
        return pipelines.svm_sigmoid_embed()
    elif name == 'random_forest_tfidf':
        return pipelines.random_forest_tfidf()
    elif name == 'random_forest_tfidf_stopwords':
        return pipelines.random_forest_tfidf_stopwords()
    elif name == 'random_forest_embed':
        return pipelines.random_forest_embed()
    else:
        raise ValueError(
            "pipeline name is unknown. You can add a custom pipeline in 'pipelines'"
        )
Esempio n. 2
0
def test_naive_bayes_pipeline():
    task = of.Offenseval()
    task.load(offenseval_data_dir)
    train_X, train_y, test_X, test_y = utils.get_instances(
        task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01)
    pipe = pipelines.naive_bayes_counts()
    pipe.fit(train_X, train_y)
    sys_y = pipe.predict(test_X)
    assert len(sys_y) == len(test_y)
Esempio n. 3
0
def test_hate_speech():
    task = vf.VuaFormat()
    task.load(hate_speech_data_dir)
    train_X, train_y, test_X, test_y = utils.get_instances(
        task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01)
    pipe = pipelines.naive_bayes_counts()
    pipe.fit(train_X, train_y)
    sys_y = pipe.predict(test_X)
    assert len(sys_y) == len(test_y)
Esempio n. 4
0
def test_full_pipelines():
    train_X, train_y, test_X, test_y = train_test_data()
    pipes = [
        pipelines.naive_bayes_counts,
        pipelines.svm_libsvc_embed(),
        pipelines.naive_bayes_counts_lex()
    ]

    for pipe in pipes:
        pipe = pipelines.naive_bayes_counts()
        pipe.fit(train_X, train_y)
        sys_y = pipe.predict(test_X)
        assert len(sys_y) == len(test_y)
def pipeline(name):
    if name == 'naive_bayes_counts':
        return pipelines.naive_bayes_counts()
    elif name == 'naive_bayes_tfidf':
        return pipelines.naive_bayes_tfidf()
    elif name == 'svm_libsvc_counts':
        return pipelines.svm_libsvc_counts()
    elif name == 'svm_libsvc_tfidf':
        return pipelines.svm_libsvc_tfidf()
    elif name == 'svm_libsvc_embed':
        return pipelines.svm_libsvc_embed()
    elif name == 'svm_sigmoid_embed':
        return pipelines.svm_sigmoid_embed()
    else:
        raise ValueError(
            "pipeline name is unknown. You can add a custom pipeline in 'pipelines'"
        )