def test_estimate_competence_ratio_batch(): n_samples = 10 query = np.ones((n_samples, 2)) x = np.array([0, 1, 2, 3, 4, 5, 6]).reshape(-1, 1) y = np.array([0, 0, 0, 0, 1, 1, 1]) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] target = DESKNN(pool_classifiers, k=7, pct_accuracy=1, pct_diversity=1, metric='ratio') target.fit(x, y) neighbors = np.tile([0, 1, 2, 3, 4, 5, 6], (n_samples, 1)) competences, diversity = target.estimate_competence(query, neighbors) assert np.allclose(competences, [2. / 7, 4. / 7, 5. / 7]) assert np.allclose(diversity, [2.166, 3.666, 4.500], atol=0.01)
def test_select_batch(): """ Test case: 10 base classifiers; select 5 based on accuracy, then the 3 most diverse. accuracies (/10): 4 6 1 2 9 8 7 9 3 2 (should select indices_ 1, 4, 5, 6, 7) diversity: 0 8 0 0 1 6 7 2 0 0 (should select indices_ 1, 5, 6 as most diverse) """ n_samples = 10 pool_classifiers = [create_base_classifier(1) for _ in range(10)] accuracies = np.tile([4, 6, 1, 2, 9, 8, 7, 9, 3, 2], (n_samples, 1)) / 10. diversity = np.tile([0, 8, 0, 0, 1, 6, 7, 2, 0, 0], (n_samples, 1)) target = DESKNN(pool_classifiers, k=7, pct_accuracy=5. / 10, pct_diversity=3. / 10) target.N_ = 5 target.J_ = 3 selected_classifiers = target.select(accuracies, diversity) expected = np.tile([1, 5, 6], (n_samples, 1)) assert np.array_equal(np.unique(selected_classifiers), np.unique(expected))
def test_estimate_competence(): """ Test case: Correct labels: 0000111 classifier 1: 1010000 (2/7 correct) classifier 2: 1000100 (4/7 correct) classifier 2: 0010110 (5/7 correct) Diversity: compute number of common errors (on both classifiers) and divide by 7: clf1 x clf2: 3/7 clf1 x clf3: 2/7 clf2 x clf3: 1/7 clf1 diversity = (3+2)/7 = -5/7 (negative because we use the negative of double error) clf2 diversity = (3+1)/7 = -4/7 clf3 diversity = (2+1)/7 = -3/7 """ query = np.ones((1, 2)) x = np.array([0, 1, 2, 3, 4, 5, 6]).reshape(-1, 1) y = np.array([0, 0, 0, 0, 1, 1, 1]) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] target = DESKNN(pool_classifiers, k=7, pct_accuracy=1, pct_diversity=1) target.fit(x, y) neighbors = np.array([[0, 1, 2, 3, 4, 5, 6]]) competences, diversity = target.estimate_competence(query, neighbors) assert np.allclose(competences, [2. / 7, 4. / 7, 5. / 7]) assert np.allclose(diversity, [-5. / 7, -4. / 7, -3. / 7])
def test_classify_instance(): query = np.atleast_2d([1, -1]) des_knn_test = DESKNN(create_pool_classifiers() * 4, k=2) des_knn_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9]) predicted = des_knn_test.classify_instance(query) assert predicted == 0
def test_select_less_diverse(): """ Test case: 10 base classifiers; select 5 based on accuracy, then the 3 less diverse accuracies (/10): 4 6 1 2 9 8 7 9 3 2 (should select indices_ 1, 4, 5, 6, 7) diversity: 0 8 0 0 1 6 7 2 0 0 (should select indices_ 4, 5, 7 as most diverse) """ pool_classifiers = [create_base_classifier(1) for _ in range(10)] accuracies = np.array([[4, 6, 1, 2, 9, 8, 7, 9, 3, 2]]) / 10. diversity = np.array([[0, 8, 0, 0, 1, 6, 7, 2, 0, 0]]) target = DESKNN(pool_classifiers, k=7, pct_accuracy=5. / 10, pct_diversity=3. / 10, more_diverse=False) target.N_ = 5 target.J_ = 3 selected_classifiers = target.select(accuracies, diversity) expected = np.array([[4, 5, 7]]) assert np.array_equal(np.unique(selected_classifiers), np.unique(expected))
def test_desknn_proba(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() desknn = DESKNN(pool_classifiers, DFP=True) desknn.fit(X_dsel, y_dsel) probas = desknn.predict_proba(X_test) expected = np.load('deslib/tests/expected_values/desknn_probas_DFP.npy') assert np.allclose(probas, expected)
def test_J_higher_than_N(): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises(ValueError): desknn = DESKNN([create_base_classifier(1)] * 100, pct_accuracy=0.3, pct_diversity=0.5) desknn.fit(X, y)
def test_desknn_proba(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() desknn = DESKNN(pool_classifiers, knn_classifier=knn_methods) desknn.fit(X_dsel, y_dsel) probas = desknn.predict_proba(X_test) expected = np.load( 'deslib/tests/expected_values/desknn_proba_integration.npy') assert np.allclose(probas, expected)
def test_classify_instance(): query = np.atleast_2d([1, -1]) des_knn_test = DESKNN(create_pool_classifiers() * 4, k=2) des_knn_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9]) predictions = [] for clf in des_knn_test.pool_classifiers: predictions.append(clf.predict(query)[0]) predicted = des_knn_test.classify_instance(query, predictions=np.array(predictions)) assert predicted == 0
def test_classify_with_ds_diff_sizes(): query = np.ones((10, 2)) predictions = np.ones((5, 3)) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] desknn_test = DESKNN(pool_classifiers) with pytest.raises(ValueError): desknn_test.classify_with_ds(query, predictions)
def test_classify_with_ds_single_sample(): query = np.ones(2) predictions = np.array([0, 1, 0]) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] desknn_test = DESKNN(pool_classifiers) desknn_test.estimate_competence = MagicMock( return_value=(np.ones(3), np.ones(3))) desknn_test.select = MagicMock(return_value=np.array([[0, 2]])) result = desknn_test.classify_with_ds(query, predictions) assert np.allclose(result, 0)
def test_soft_voting_no_proba(create_X_y): from sklearn.linear_model import Perceptron X, y = create_X_y clf = Perceptron() clf.fit(X, y) with pytest.raises(ValueError): DESKNN([clf, clf], voting='soft').fit(X, y)
def test_select(): """ Test case: 10 base classifiers; select 5 based on accuracy, then the 3 most diverse accuracies (/10): 4 6 1 2 9 8 7 9 3 2 (should select indices 1, 4, 5, 6, 7) diversity: 0 8 0 0 1 6 7 2 0 0 (should select indices 1, 5, 6 as most diverse) """ pool_classifiers = [create_base_classifier(1) for _ in range(10)] accuracies = np.array([4, 6, 1, 2, 9, 8, 7, 9, 3, 2]) / 10. diversity = np.array([0, 8, 0, 0, 1, 6, 7, 2, 0, 0]) target = DESKNN(pool_classifiers, k=7, pct_accuracy=5./10, pct_diversity=3./10) target.estimate_competence = lambda x: (accuracies, diversity) selected_indices = target.select(2) assert set(selected_indices) == {1, 5, 6}
def test_estimate_competence_Q(): query = np.ones((1, 2)) x = np.array([0, 1, 2, 3, 4, 5, 6]).reshape(-1, 1) y = np.array([0, 0, 0, 0, 1, 1, 1]) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] target = DESKNN(pool_classifiers, k=7, pct_accuracy=1, pct_diversity=1, metric='Q') target.fit(x, y) target.DFP_mask = np.ones(target.n_classifiers) target._get_region_competence = lambda x: ( None, np.array([[0, 1, 2, 3, 4, 5, 6]])) competences, diversity = target.estimate_competence(query) assert np.allclose(competences, [2. / 7, 4. / 7, 5. / 7]) assert np.allclose(diversity, [2, 1.2, 1.2])
def test_estimate_competence_Q(): x = np.array([0, 1, 2, 3, 4, 5, 6]).reshape(-1, 1) y = np.array([0, 0, 0, 0, 1, 1, 1]) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] target = DESKNN(pool_classifiers, k=7, pct_accuracy=1, pct_diversity=1, metric='Q') target.fit(x, y) neighbors = np.array([[0, 1, 2, 3, 4, 5, 6]]) competences, diversity = target.estimate_competence(neighbors) assert np.allclose(competences, [2. / 7, 4. / 7, 5. / 7]) assert np.allclose(diversity, [2, 1.2, 1.2])
def test_estimate_competence_ratio(): x = np.array([0, 1, 2, 3, 4, 5, 6]).reshape(-1, 1) y = np.array([0, 0, 0, 0, 1, 1, 1]) clf1 = create_base_classifier(np.array([1, 0, 1, 0, 0, 0, 0])) clf2 = create_base_classifier(np.array([1, 0, 0, 0, 1, 0, 0])) clf3 = create_base_classifier(np.array([0, 0, 1, 0, 1, 1, 0])) pool_classifiers = [clf1, clf2, clf3] target = DESKNN(pool_classifiers, k=7, pct_accuracy=1, pct_diversity=1, metric='Ratio') target.fit(x, y) target.DFP_mask = np.ones(target.n_classifiers) target._get_region_competence = lambda x: (None, [0, 1, 2, 3, 4, 5, 6]) competences, diversity = target.estimate_competence(2) assert np.allclose(competences, [2./7, 4./7, 5./7]) assert np.allclose(diversity, [2.166, 3.666, 4.500], atol=0.01)
def test_J_higher_than_N(): with pytest.raises(ValueError): DESKNN([create_base_classifier(1)]*100, pct_accuracy=0.3, pct_diversity=0.5)
def test_desknn(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() desknn = DESKNN(pool_classifiers, knn_classifier=knn_methods) desknn.fit(X_dsel, y_dsel) assert np.isclose(desknn.score(X_test, y_test), 0.97340425531914898)
def test_input_diversity_parameter(): with pytest.raises(ValueError): DESKNN([create_base_classifier(1)]*100, metric='abc')
def test_predict_proba(): X = np.random.randn(15, 5) y = np.array([0, 1, 0, 0, 0] * 3) clf1 = Perceptron() clf1.fit(X, y) DESKNN([clf1, clf1, clf1]).fit(X, y)
def test_pect_zero(): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises(ValueError): desknn = DESKNN(pct_accuracy=0.0, pct_diversity=0.0) desknn.fit(X, y)
def test_J_higher_than_N(): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises(ValueError): desknn = DESKNN(pct_accuracy=0.3, pct_diversity=0.5) desknn.fit(X, y)
def test_input_diversity_parameter(): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises(ValueError): desknn = DESKNN(metric='abc') desknn.fit(X, y)
def test_desknn(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() desknn = DESKNN(pool_classifiers, DFP=True) desknn.fit(X_dsel, y_dsel) assert np.isclose(desknn.score(X_test, y_test), 0.89393939393939392)
def test_input_diversity_parameter(): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises(ValueError): desknn = DESKNN([create_base_classifier(1)] * 100, metric='abc') desknn.fit(X, y)
def test_wrong_voting_value(voting, create_X_y, create_pool_classifiers): X, y = create_X_y pool = create_pool_classifiers with pytest.raises(ValueError): DESKNN(pool, voting=voting).fit(X, y)
def test_check_estimator(): check_estimator(DESKNN())
def test_J_N_values(): with pytest.raises(ValueError): DESKNN([create_base_classifier(1)]*100, pct_accuracy=0.5, pct_diversity=0)