def test_des_clustering(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) des_clustering = DESClustering(pool_classifiers, rng=rng) des_clustering.fit(X_dsel, y_dsel) assert np.isclose(des_clustering.score(X_test, y_test), 0.97872340425531912)
def test_diversity_metric_ratio(): test = DESClustering(create_pool_classifiers() * 10, metric='ratio') # Mocking this method to avoid preprocessing the cluster information that # is not required in this test. test._preprocess_clusters = MagicMock(return_value=1) test.fit(X_dsel_ex1, y_dsel_ex1) assert test.diversity_func_ == ratio_errors
def test_classify_instance(): query = np.atleast_2d([1, -1]) clustering_test = DESClustering(create_pool_classifiers() * 4, k=2) clustering_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9]) predicted = clustering_test.classify_instance(query) assert predicted == 0
def test_fit_heterogeneous_clusters(example_estimate_competence, create_pool_classifiers): clustering_test = DESClustering(create_pool_classifiers, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) X, y = example_estimate_competence[0:2] clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex2) clustering_test.fit(X, y) # Index selected should be of any classifier that predicts the label 0 assert np.isclose(clustering_test.performance_cluster_[:, 1], [0.428, 0.375], atol=0.01).all() assert np.isclose(clustering_test.performance_cluster_[:, 0], [0.572, 0.625], atol=0.01).all() assert clustering_test.indices_[0, 0] == 0 or clustering_test.indices_[0, 0] == 2 assert clustering_test.indices_[1, 0] == 0 or clustering_test.indices_[1, 0] == 2
def test_classify_with_ds_single_sample(): query = np.ones(2) predictions = np.array([0, 1, 0]) desknn_test = DESClustering(create_pool_classifiers()) desknn_test.select = MagicMock(return_value=np.array([[0, 2]])) result = desknn_test.classify_with_ds(query, predictions) assert np.allclose(result, 0)
def test_classify_with_ds_diff_sizes(): query = np.ones((10, 2)) predictions = np.ones((5, 3)) desknn_test = DESClustering(create_pool_classifiers()) with pytest.raises(ValueError): desknn_test.classify_with_ds(query, predictions)
def test_classify_with_ds_diff_sizes(): query = np.ones((10, 2)) predictions = np.ones((5, 3)) des_clustering = DESClustering() with pytest.raises(ValueError): des_clustering.classify_with_ds(query, predictions)
def test_diversity_metric_DF(create_X_y): X, y = create_X_y test = DESClustering(metric='DF') # Mocking this method to avoid preprocessing the cluster # information that is not required in this test. test._preprocess_clusters = MagicMock(return_value=1) test.fit(X, y) assert test.diversity_func_ == negative_double_fault
def test_des_clustering_proba(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) des_clustering = DESClustering(pool_classifiers, rng=rng) des_clustering.fit(X_dsel, y_dsel) probas = des_clustering.predict_proba(X_test) expected = np.load('deslib/tests/expected_values/des_clustering_proba_integration.npy') assert np.allclose(probas, expected)
def test_proba_with_ds_diff_sizes(): query = np.ones((10, 2)) predictions = np.ones((5, 3)) probabilities = np.ones((5, 3, 2)) des_clustering = DESClustering() with pytest.raises(ValueError): des_clustering.predict_proba_with_ds(query, predictions, probabilities)
def test_des_clustering(): from sklearn.cluster import KMeans pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) cluster = KMeans(n_clusters=5, random_state=rng) des_clustering = DESClustering(pool_classifiers, clustering=cluster) des_clustering.fit(X_dsel, y_dsel) assert np.isclose(des_clustering.score(X_test, y_test), 0.97872340425531912)
def test_select(): query = np.atleast_2d([1, -1]) clustering_test = DESClustering(create_pool_classifiers() * 2, k=2) clustering_test.roc_algorithm.predict = MagicMock(return_value=[0]) clustering_test.indices = np.array([[0, 2], [1, 4]]) assert np.array_equal(clustering_test.select(query), [0, 2]) clustering_test.roc_algorithm.predict = MagicMock(return_value=[1]) assert np.array_equal(clustering_test.select(query), [1, 4])
def test_diversity_metric_ratio(create_X_y): X, y = create_X_y test = DESClustering(metric_diversity='ratio') # Mocking this method to avoid preprocessing the cluster # information that is not required in this test. test._preprocess_clusters = MagicMock(return_value=1) test.fit(X, y) assert test.diversity_func_ == ratio_errors
def test_classify_instance(): query = np.atleast_2d([1, -1]) clustering_test = DESClustering(create_pool_classifiers() * 4, k=2) clustering_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9]) predictions = [] for clf in clustering_test.pool_classifiers: predictions.append(clf.predict(query)[0]) predicted = clustering_test.classify_instance(query, np.array(predictions)) assert predicted == 0
def test_des_clustering_proba(): from sklearn.cluster import KMeans pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) cluster = KMeans(n_clusters=5, random_state=rng) des_clustering = DESClustering(pool_classifiers, clustering=cluster) des_clustering.fit(X_dsel, y_dsel) probas = des_clustering.predict_proba(X_test) expected = np.load( 'deslib/tests/expected_values/des_clustering_proba_integration.npy') assert np.allclose(probas, expected)
def test_classify_instance(create_pool_classifiers): query = np.ones((1, 2)) clustering_test = DESClustering(create_pool_classifiers * 4, clustering=KMeans(n_clusters=2)) clustering_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9]) predictions = [] for clf in clustering_test.pool_classifiers: predictions.append(clf.predict(query)[0]) predicted = clustering_test.classify_with_ds(query, np.array(predictions)) assert predicted == 0
def test_fit_homogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers()*2, k=2, pct_accuracy=0.5, pct_diversity=0.33) clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex1) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert clustering_test.accuracy_cluster[0, 1] == 0.0 and clustering_test.accuracy_cluster[0, [0, 2]].all() == 1.0 assert clustering_test.accuracy_cluster[1, 1] == 1.0 and clustering_test.accuracy_cluster[1, [0, 2]].all() == 0.0 for idx in clustering_test.indices[0, :]: assert idx in (0, 2, 3, 5)
def test_fit_heterogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33) clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) # Index selected should be of any classifier that predicts the class label 0 assert np.isclose(clustering_test.accuracy_cluster[:, 1], [0.428, 0.375], atol=0.01).all() assert np.isclose(clustering_test.accuracy_cluster[:, 0], [0.572, 0.625], atol=0.01).all() assert clustering_test.indices[0, 0] == 0 or clustering_test.indices[0, 0] == 2 assert clustering_test.indices[1, 0] == 0 or clustering_test.indices[1, 0] == 2
def test_fit_clusters_less_diverse(example_estimate_competence, create_pool_classifiers): clustering_test = DESClustering(create_pool_classifiers * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33, more_diverse=False) X, y = example_estimate_competence[0:2] clustering_test.clustering.predict = MagicMock(return_value=y) clustering_test.fit(X, y) assert (clustering_test.performance_cluster_[0, 1] == 0.0 and clustering_test.performance_cluster_[0, [0, 2]].all() == 1.0) assert (clustering_test.performance_cluster_[1, 1] == 1.0 and clustering_test.performance_cluster_[1, [0, 2]].all() == 0.0) for idx in clustering_test.indices_[0, :]: assert idx in (1, 3, 4, 5)
def test_fit_clusters_less_diverse(): clustering_test = DESClustering(create_pool_classifiers() * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33, more_diverse=False) clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex1) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert (clustering_test.accuracy_cluster_[0, 1] == 0.0 and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0) assert (clustering_test.accuracy_cluster_[1, 1] == 1.0 and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0) for idx in clustering_test.indices_[0, :]: assert idx in (1, 3, 4, 5)
def test_fit_homogeneous_clusters(create_pool_classifiers, example_estimate_competence): clustering_test = DESClustering(create_pool_classifiers * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) X, y = example_estimate_competence[0:2] clustering_test.clustering.predict = MagicMock(return_value=y) clustering_test.fit(X, y) assert (clustering_test.accuracy_cluster_[0, 1] == 0.0 and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0) assert (clustering_test.accuracy_cluster_[1, 1] == 1.0 and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0) for idx in clustering_test.indices_[0, :]: assert idx in (0, 2, 3, 5)
def test_fit_homogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers() * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex1) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert clustering_test.accuracy_cluster_[ 0, 1] == 0.0 and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0 assert clustering_test.accuracy_cluster_[ 1, 1] == 1.0 and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0 for idx in clustering_test.indices_[0, :]: assert idx in (0, 2, 3, 5)
def test_fit_heterogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers(), clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex2) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) # Index selected should be of any classifier that predicts the label 0 assert np.isclose(clustering_test.accuracy_cluster_[:, 1], [0.428, 0.375], atol=0.01).all() assert np.isclose(clustering_test.accuracy_cluster_[:, 0], [0.572, 0.625], atol=0.01).all() assert clustering_test.indices_[0, 0] == 0 or clustering_test.indices_[0, 0] == 2 assert clustering_test.indices_[1, 0] == 0 or clustering_test.indices_[1, 0] == 2
def test_fit_clusters_less_diverse(): clustering_test = DESClustering(create_pool_classifiers() * 2, k=2, pct_accuracy=1.0, pct_diversity=0.60, more_diverse=False) clustering_test.roc_algorithm.fit_predict = MagicMock( return_value=return_cluster_index_ex1) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert clustering_test.accuracy_cluster[ 0, 1] == 0.0 and clustering_test.accuracy_cluster[0, [0, 2]].all() == 1.0 assert clustering_test.accuracy_cluster[ 1, 1] == 1.0 and clustering_test.accuracy_cluster[1, [0, 2]].all() == 0.0 assert np.isin(clustering_test.indices[0, :], np.array([1, 3, 5, 4])).all()
def test_estimate_competence(create_pool_classifiers, example_estimate_competence): query = np.atleast_2d([1, 1]) clustering_test = DESClustering(create_pool_classifiers * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) X, y = example_estimate_competence[0:2] # Keep the original predict method to change after clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex2) clustering_test.fit(X, y) clustering_test.clustering_.predict = MagicMock(return_value=0) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.performance_cluster_[0, :]) clustering_test.clustering_.predict = MagicMock(return_value=1) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.performance_cluster_[1, :])
def test_select(): query = np.atleast_2d([1, -1]) clustering_test = DESClustering() clustering_test.clustering_ = KMeans() clustering_test.clustering_.predict = MagicMock(return_value=[0]) clustering_test.indices_ = np.array([[0, 2], [1, 4]]) assert np.array_equal(clustering_test.select(query), [[0, 2]]) clustering_test.clustering_.predict = MagicMock(return_value=[1]) assert np.array_equal(clustering_test.select(query), [[1, 4]])
def test_estimate_competence(): query = np.atleast_2d([1, 1]) clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33) clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) clustering_test.roc_algorithm.predict = MagicMock(return_value=0) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.accuracy_cluster[0, :]) clustering_test.roc_algorithm.predict = MagicMock(return_value=1) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.accuracy_cluster[1, :])
def test_diversity_metric_ratio(): test = DESClustering(create_pool_classifiers() * 10, metric='ratio') assert test.diversity_func == ratio_errors
def test_diversity_metric_DF(): test = DESClustering(create_pool_classifiers() * 10, metric='DF') assert test.diversity_func == negative_double_fault
def test_diversity_metric_Q(): test = DESClustering(create_pool_classifiers() * 10, metric='Q') assert test.diversity_func == Q_statistic