def test_predict_proba(example_static_selection): X, y, pool = example_static_selection expected = np.tile([0.52, 0.48], (y.size, 1)) static_selection_test = StaticSelection(pool, 0.5) static_selection_test.fit(X, y) proba = static_selection_test.predict_proba(X) assert np.allclose(proba, expected)
def test_static_selection(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() static_selection = StaticSelection(pool_classifiers) static_selection.fit(X_dsel, y_dsel) assert np.isclose(static_selection.score(X_test, y_test), 0.90606060606060601)
def test_predict_diff(example_static_selection): X, y, pool = example_static_selection static_selection_test = StaticSelection(pool, 0.75) static_selection_test.fit(X, y) predicted_labels = static_selection_test.predict(X) assert np.equal(predicted_labels, 1).all()
def test_predict(example_static_selection, create_pool_classifiers): X, y, _ = example_static_selection static_selection_test = StaticSelection(create_pool_classifiers * 10, 0.25) static_selection_test.fit(X, y) predicted_labels = static_selection_test.predict(X) assert np.equal(predicted_labels, 0).all()
def test_predict(): X = X_dsel_ex1 y = y_dsel_ex1 pool_classifiers = create_pool_classifiers_score( 1, 25, 0.5) + create_pool_classifiers_score(0, 25, 1.0) static_selection_test = StaticSelection(pool_classifiers, 0.25) static_selection_test.fit(X, y) predicted_labels = static_selection_test.predict(X) assert np.equal(predicted_labels, 0).all()
def test_fit(example_static_selection): X, y, pool = example_static_selection static_selection_test = StaticSelection(pool, 0.5) static_selection_test.fit(X, y) assert static_selection_test.n_classifiers_ensemble_ == 50 assert static_selection_test.n_classifiers_ensemble_ == len( static_selection_test.clf_indices_) assert np.array_equal(np.sort(static_selection_test.clf_indices_), list(range(50, 100)))
def test_label_encoder_base_ensemble(): from sklearn.ensemble import RandomForestClassifier X, y = make_classification() y[y == 1] = 2 y = y.astype(np.float) pool = RandomForestClassifier().fit(X, y) ss = StaticSelection(pool) ss.fit(X, y) pred = ss.predict(X) assert np.isin(ss.classes_, pred).all()
def test_different_scorer(): X, y = make_classification(n_samples=100, random_state=42) X_val, y_val = make_classification(n_samples=25, random_state=123) pool = AdaBoostClassifier(n_estimators=10).fit(X, y) performances = [] for clf in pool: preds = clf.predict_proba(X_val) performances.append(log_loss(y_val.ravel(), preds[:, -1])) id_best = np.argsort(performances) ss = StaticSelection(pool_classifiers=pool, scoring='neg_log_loss') ss.fit(X_val, y_val) assert (id_best[:ss.n_classifiers_ensemble_] == ss.clf_indices_).all()
def test_fit(): X = X_dsel_ex1 y = y_dsel_ex1 pool_classifiers = create_pool_classifiers_score( 1, 50, 0.5) + create_pool_classifiers_score(1, 50, 1.0) static_selection_test = StaticSelection(pool_classifiers, 0.5) static_selection_test.fit(X, y) assert static_selection_test.n_classifiers_ensemble == 50 assert static_selection_test.n_classifiers_ensemble == len( static_selection_test.clf_indices) assert np.array_equal(np.sort(static_selection_test.clf_indices), list(range(50, 100)))
def test_invalid_pct2(): with pytest.raises(ValueError): test = StaticSelection(pct_classifiers=1.2) test.fit(np.random.rand(10, 2), np.ones(10))
def test_invalid_pct(): with pytest.raises(TypeError): test = StaticSelection(pct_classifiers='something') test.fit(np.random.rand(10, 2), np.ones(10))