def test_dataset(): """Test Dataset class.""" x_train, y_train = Dataset.load_from_files_multilabel( path.join(DATASET_MULTILABEL_PATH, "train_files"), path.join(DATASET_MULTILABEL_PATH, "file_labels.tsv") ) assert x_train == ['this is the first document!!\n\n:)', 'and this is the\n\nSECOND!!'] assert y_train == [['catA', 'catB', 'catC'], ['catA']] x_train, y_train = Dataset.load_from_files_multilabel( path.join(DATASET_MULTILABEL_PATH, "train/docs.txt"), path.join(DATASET_MULTILABEL_PATH, "train/labels.txt"), sep_label=",", sep_doc="\n>>>>>\n" ) assert len(y_train) == len(y_train) and len(y_train) == 20 assert y_train[:8] == [[], ['toxic', 'severe_toxic', 'obscene', 'insult'], [], [], [], [], [], ['toxic']]
def test_multilabel(): """Test multilabel support.""" x_train, y_train = Dataset.load_from_files_multilabel( path.join(dataset_multilabel_path, "train/docs.txt"), path.join(dataset_multilabel_path, "train/labels.txt"), sep_label=",", sep_doc="\n>>>>>\n") clf = SS3() with pytest.raises(ValueError): membership_matrix(clf, []) clf.fit(x_train, y_train) assert sorted(clf.get_categories()) == [ 'insult', 'obscene', 'severe_toxic', 'toxic' ] assert clf.classify_multilabel("this is a unknown document!") == [] y_pred = [[], ['toxic'], ['severe_toxic'], ['obscene'], ['insult'], ['toxic', 'insult']] y_pred_memmatrix = membership_matrix(clf, y_pred).todense().tolist() assert y_pred_memmatrix == [ [0, 0, 0, 0], # [] [1, 0, 0, 0], # ['toxic'] [0, 1, 0, 0], # ['severe_toxic'] [0, 0, 1, 0], # ['obscene'] [0, 0, 0, 1], # ['insult'] [1, 0, 0, 1] ] # ['toxic', 'insult'] y_pred_memmatrix = membership_matrix(clf, y_pred + [["xxx"]]).todense().tolist() assert y_pred_memmatrix[-1] == [0, 0, 0, 0]
def test_evaluation(mocker): """Test Evaluation class.""" mocker.patch("webbrowser.open") mocker.patch("matplotlib.pyplot.show") kfold_validation = Evaluation.kfold_cross_validation Evaluation.__cache__ = None Evaluation.__cache_file__ = None Evaluation.__clf__ = None Evaluation.__last_eval_tag__ = None Evaluation.__last_eval_method__ = None Evaluation.__last_eval_def_cat__ = None ss = [0, 0.5] ll = [0, 1.5] pp = [0, 2] x_data, y_data = Dataset.load_from_files(DATASET_PATH) x_data_ml, y_data_ml = Dataset.load_from_files_multilabel( path.join(DATASET_MULTILABEL_PATH, "train/docs.txt"), path.join(DATASET_MULTILABEL_PATH, "train/labels.txt"), sep_label=",", sep_doc="\n>>>>>\n" ) clf = SS3() clf.set_model_path("tests") clf_ml = SS3(name="multilabel") clf_ml.set_model_path("tests") # no classifier assigned case Evaluation.clear_cache() with pytest.raises(ValueError): Evaluation.get_best_hyperparameters() with pytest.raises(ValueError): Evaluation.remove() with pytest.raises(ValueError): Evaluation.show_best() with pytest.raises(ValueError): Evaluation.plot(TMP_FOLDER) # Not-yet-trained model case Evaluation.set_classifier(clf) Evaluation.clear_cache() Evaluation.remove() Evaluation.show_best() assert Evaluation.plot(TMP_FOLDER) is False with pytest.raises(pyss3.EmptyModelError): Evaluation.test(clf, x_data, y_data) with pytest.raises(pyss3.EmptyModelError): kfold_validation(clf, x_data, y_data) with pytest.raises(pyss3.EmptyModelError): Evaluation.grid_search(clf, x_data, y_data) with pytest.raises(LookupError): Evaluation.get_best_hyperparameters() # default argument values clf.train(x_data, y_data) clf_ml.train(x_data_ml, y_data_ml) assert Evaluation.test(clf, x_data, y_data, plot=PY3) == 1 assert Evaluation.test(clf, ['bla bla bla'], ['pos'], plot=PY3) == 0 assert Evaluation.test(clf, ['bla bla bla', "I love this love movie!"], ['pos', 'pos'], plot=PY3) == 0.5 assert kfold_validation(clf_ml, x_data_ml, y_data_ml, plot=PY3) > 0 assert kfold_validation(clf, x_data, y_data, plot=PY3) > 0 s, l, p, a = clf.get_hyperparameters() s, l, p, a = clf.get_hyperparameters() s0, l0, p0, a0 = Evaluation.grid_search(clf, x_data, y_data) s1, l1, p1, a1 = Evaluation.get_best_hyperparameters() s2, l2, p2, a2 = Evaluation.get_best_hyperparameters("recall") Evaluation.__last_eval_tag__ = None s1, l1, p1, a1 = Evaluation.get_best_hyperparameters() assert s0 == s and l0 == l and p0 == p and a0 == a assert s0 == s1 and l0 == l1 and p0 == p1 and a0 == a1 assert s0 == s2 and l0 == l2 and p0 == p2 and a0 == a2 assert Evaluation.plot(TMP_FOLDER) is True Evaluation.remove() Evaluation.show_best() assert Evaluation.plot(TMP_FOLDER) is False # test # OK assert Evaluation.test(clf_ml, x_data_ml, y_data_ml, plot=PY3) == .3125 assert Evaluation.test(clf_ml, x_data_ml, y_data_ml, metric='exact-match', plot=PY3) == .3 assert Evaluation.test(clf, x_data, y_data, def_cat='unknown', plot=PY3) == 1 assert Evaluation.test(clf, x_data, y_data, def_cat='neg', plot=PY3) == 1 assert Evaluation.test(clf, x_data, y_data, metric="f1-score", plot=PY3) == 1 assert Evaluation.test(clf, x_data, y_data, plot=PY3, metric="recall", metric_target="weighted avg") == 1 assert Evaluation.test(clf, x_data, y_data, plot=PY3, metric="recall", metric_target="neg") == 1 # Not OK with pytest.raises(InvalidCategoryError): Evaluation.test(clf, x_data, y_data, def_cat='xxx', plot=PY3) with pytest.raises(KeyError): Evaluation.test(clf, x_data, y_data, metric="xxx", plot=PY3) with pytest.raises(KeyError): Evaluation.test(clf, x_data, y_data, metric="recall", metric_target="xxx", plot=PY3) with pytest.raises(ValueError): Evaluation.test(clf, x_data, y_data, metric='hamming-loss') # k-fold # OK assert kfold_validation(clf, x_data, y_data, n_grams=3, plot=PY3) > 0 assert kfold_validation(clf, x_data, y_data, k=10, plot=PY3) > 0 assert kfold_validation(clf, x_data, y_data, k=10, def_cat='unknown', plot=PY3) > 0 assert kfold_validation(clf, x_data, y_data, k=10, def_cat='neg', plot=PY3) > 0 assert kfold_validation(clf, x_data, y_data, metric="f1-score", plot=PY3) > 0 assert kfold_validation(clf, x_data, y_data, plot=PY3, metric="recall", metric_target="weighted avg") > 0 assert kfold_validation(clf, x_data, y_data, plot=PY3, metric="recall", metric_target="neg") > 0 # Not OK with pytest.raises(ValueError): kfold_validation(clf, x_data, y_data, n_grams=-1, plot=PY3) with pytest.raises(ValueError): kfold_validation(clf, x_data, y_data, n_grams=clf, plot=PY3) with pytest.raises(ValueError): kfold_validation(clf, x_data, y_data, k=-1, plot=PY3) with pytest.raises(ValueError): kfold_validation(clf, x_data, y_data, k=clf, plot=PY3) with pytest.raises(ValueError): kfold_validation(clf, x_data, y_data, k=None, plot=PY3) with pytest.raises(InvalidCategoryError): kfold_validation(clf, x_data, y_data, def_cat='xxx', plot=PY3) with pytest.raises(KeyError): kfold_validation(clf, x_data, y_data, metric="xxx", plot=PY3) with pytest.raises(KeyError): kfold_validation(clf, x_data, y_data, metric="recall", metric_target="xxx", plot=PY3) # grid_search # OK s0, l0, p0, a0 = Evaluation.grid_search(clf, x_data, y_data, s=ss) s1, l1, p1, a1 = Evaluation.grid_search(clf_ml, x_data_ml, y_data_ml, s=ss, l=ll, p=pp) s1, l1, p1, a1 = Evaluation.grid_search(clf, x_data, y_data, s=ss, l=ll, p=pp) assert s0 == s1 and l0 == l1 and p0 == p1 and a0 == a1 s0, l0, p0, a0 = Evaluation.grid_search(clf_ml, x_data_ml, y_data_ml, k_fold=4) s0, l0, p0, a0 = Evaluation.grid_search(clf, x_data, y_data, k_fold=4) s0, l0, p0, a0 = Evaluation.grid_search(clf, x_data, y_data, def_cat='unknown', p=pp) s1, l1, p1, a1 = Evaluation.grid_search(clf, x_data, y_data, def_cat='neg', p=pp) assert s0 == s1 and l0 == l1 and p0 == p1 and a0 == a1 s0, l0, p0, a0 = Evaluation.grid_search(clf, x_data, y_data, metric="f1-score", p=pp) s1, l1, p1, a1 = Evaluation.grid_search(clf, x_data, y_data, p=pp, metric="recall", metric_target="weighted avg") s1, l1, p1, a1 = Evaluation.grid_search(clf, x_data, y_data, p=pp, metric="recall", metric_target="neg") assert s0 == s1 and l0 == l1 and p0 == p1 and a0 == a1 # Not OK with pytest.raises(TypeError): Evaluation.grid_search(clf, x_data, y_data, s='asd') with pytest.raises(TypeError): Evaluation.grid_search(clf, x_data, y_data, s=clf) with pytest.raises(TypeError): Evaluation.grid_search(clf, x_data, y_data, k_fold=clf) with pytest.raises(TypeError): Evaluation.grid_search(clf, x_data, y_data, k_fold="xxx") with pytest.raises(InvalidCategoryError): Evaluation.grid_search(clf, x_data, y_data, def_cat='xxx') with pytest.raises(KeyError): Evaluation.grid_search(clf, x_data, y_data, metric="xxx") with pytest.raises(KeyError): Evaluation.grid_search(clf, x_data, y_data, metric="recall", metric_target="xxx") # get_best_hyperparameters s1, l1, p1, a1 = Evaluation.get_best_hyperparameters() s2, l2, p2, a2 = Evaluation.get_best_hyperparameters("recall") s1, l1, p1, a1 = Evaluation.get_best_hyperparameters("recall", "weighted avg") s1, l1, p1, a1 = Evaluation.get_best_hyperparameters("recall", "pos") s1, l1, p1, a1 = Evaluation.get_best_hyperparameters(method="10-fold") s1, l1, p1, a1 = Evaluation.get_best_hyperparameters(method="10-fold", def_cat="neg") s1, l1, p1, a1 = Evaluation.get_best_hyperparameters(method="10-fold", def_cat="unknown") assert s0 == s1 and l0 == l1 and p0 == p1 and a0 == a1 assert s0 == s2 and l0 == l2 and p0 == p2 and a0 == a2 # Not OK with pytest.raises(KeyError): Evaluation.get_best_hyperparameters("xxx") with pytest.raises(KeyError): Evaluation.get_best_hyperparameters("recall", "xxx") with pytest.raises(LookupError): Evaluation.get_best_hyperparameters(method="xxx") with pytest.raises(LookupError): Evaluation.get_best_hyperparameters(def_cat="xxx") with pytest.raises(LookupError): Evaluation.get_best_hyperparameters(method="4-fold", def_cat="unknown") # plot OK assert Evaluation.plot(TMP_FOLDER) is True # remove # OK assert Evaluation.remove(s, l, p, a)[0] == 10 assert Evaluation.remove(def_cat="neg")[0] == 2 assert Evaluation.remove(method="test")[0] == 12 assert Evaluation.remove(s=-10)[0] == 0 assert Evaluation.remove(def_cat="xxx")[0] == 0 assert Evaluation.remove(method="xxx")[0] == 0 assert Evaluation.remove()[0] == 1 assert Evaluation.plot(TMP_FOLDER) is False # plot not OK (no evaluations) # not OK with pytest.raises(TypeError): Evaluation.remove("xxx") with pytest.raises(TypeError): Evaluation.remove(clf) Evaluation.show_best() Evaluation.show_best(method="test") Evaluation.show_best(def_cat="unknown") Evaluation.show_best(metric="f1-score") Evaluation.show_best(metric="f1-score", avg="weighted avg") # different tag rmtree("./tests/ss3_models", ignore_errors=True)