def test_naive_bayes_pipeline(): task = of.Offenseval() task.load(offenseval_data_dir) train_X, train_y, test_X, test_y = utils.get_instances( task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01) pipe = pipelines.naive_bayes() pipe.fit(train_X, train_y) sys_y = pipe.predict(test_X) assert len(sys_y) == len(test_y)
def test_hate_speech(): task = vf.VuaFormat() task.load(hate_speech_data_dir, ['testData.csv']) train_X, train_y, test_X, test_y = utils.get_instances( task, split_train_dev=True, proportion_train=0.1, proportion_dev=0.01) pipe = pipelines.naive_bayes() pipe.fit(train_X, train_y) sys_y = pipe.predict(test_X) assert len(sys_y) == len(test_y)
def pipeline(name): if name == 'naive_bayes': return pipelines.naive_bayes() elif name == 'svm_libsvc_counts': return pipelines.svm_libsvc_counts() elif name == 'svm_libsvc_tfidf': return pipelines.svm_libsvc_tfidf() elif name == 'svm_libsvc_embed': return pipelines.svm_libsvc_embed() elif name == 'svm_sigmoid_embed': return pipelines.svm_sigmoid_embed() else: raise ValueError("pipeline name is unknown. You can add a custom pipeline in 'pipelines'")