예제 #1
0
def test_naive_bayes_pipeline():
    task = of.Offenseval()
    task.load(offenseval_data_dir)
    train_X, train_y, test_X, test_y = utils.get_instances(
        task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01)
    pipe = pipelines.naive_bayes()
    pipe.fit(train_X, train_y)
    sys_y = pipe.predict(test_X)
    assert len(sys_y) == len(test_y)
예제 #2
0
def test_hate_speech():
    task = vf.VuaFormat()
    task.load(hate_speech_data_dir, ['testData.csv'])
    train_X, train_y, test_X, test_y = utils.get_instances(
        task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01)
    pipe = pipelines.naive_bayes()
    pipe.fit(train_X, train_y)
    sys_y = pipe.predict(test_X)
    assert len(sys_y) == len(test_y)
예제 #3
0
def pipeline(name):
    if name == 'naive_bayes':
        return pipelines.naive_bayes()
    elif name == 'svm_libsvc_counts':
        return pipelines.svm_libsvc_counts()
    elif name == 'svm_libsvc_tfidf':
        return pipelines.svm_libsvc_tfidf()
    elif name == 'svm_libsvc_embed':
        return pipelines.svm_libsvc_embed()
    elif name == 'svm_sigmoid_embed':
        return pipelines.svm_sigmoid_embed()
    else:
        raise ValueError("pipeline name is unknown. You can add a custom pipeline in 'pipelines'")