def test_des_clustering(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) des_clustering = DESClustering(pool_classifiers, rng=rng) des_clustering.fit(X_dsel, y_dsel) assert np.isclose(des_clustering.score(X_test, y_test), 0.97872340425531912)
def test_diversity_metric_ratio(): test = DESClustering(create_pool_classifiers() * 10, metric='ratio') # Mocking this method to avoid preprocessing the cluster information that # is not required in this test. test._preprocess_clusters = MagicMock(return_value=1) test.fit(X_dsel_ex1, y_dsel_ex1) assert test.diversity_func_ == ratio_errors
def test_estimate_competence(create_pool_classifiers, example_estimate_competence): query = np.atleast_2d([1, 1]) clustering_test = DESClustering(create_pool_classifiers * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) X, y = example_estimate_competence[0:2] # Keep the original predict method to change after clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex2) clustering_test.fit(X, y) clustering_test.clustering_.predict = MagicMock(return_value=0) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.performance_cluster_[0, :]) clustering_test.clustering_.predict = MagicMock(return_value=1) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.performance_cluster_[1, :])
def test_fit_heterogeneous_clusters(example_estimate_competence, create_pool_classifiers): clustering_test = DESClustering(create_pool_classifiers, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) X, y = example_estimate_competence[0:2] clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex2) clustering_test.fit(X, y) # Index selected should be of any classifier that predicts the label 0 assert np.isclose(clustering_test.performance_cluster_[:, 1], [0.428, 0.375], atol=0.01).all() assert np.isclose(clustering_test.performance_cluster_[:, 0], [0.572, 0.625], atol=0.01).all() assert clustering_test.indices_[0, 0] == 0 or clustering_test.indices_[0, 0] == 2 assert clustering_test.indices_[1, 0] == 0 or clustering_test.indices_[1, 0] == 2
def test_diversity_metric_ratio(create_X_y): X, y = create_X_y test = DESClustering(metric_diversity='ratio') # Mocking this method to avoid preprocessing the cluster # information that is not required in this test. test._preprocess_clusters = MagicMock(return_value=1) test.fit(X, y) assert test.diversity_func_ == ratio_errors
def test_des_clustering(): from sklearn.cluster import KMeans pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) cluster = KMeans(n_clusters=5, random_state=rng) des_clustering = DESClustering(pool_classifiers, clustering=cluster) des_clustering.fit(X_dsel, y_dsel) assert np.isclose(des_clustering.score(X_test, y_test), 0.97872340425531912)
def test_diversity_metric_DF(create_X_y): X, y = create_X_y test = DESClustering(metric='DF') # Mocking this method to avoid preprocessing the cluster # information that is not required in this test. test._preprocess_clusters = MagicMock(return_value=1) test.fit(X, y) assert test.diversity_func_ == negative_double_fault
def test_des_clustering_proba(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) des_clustering = DESClustering(pool_classifiers, rng=rng) des_clustering.fit(X_dsel, y_dsel) probas = des_clustering.predict_proba(X_test) expected = np.load('deslib/tests/expected_values/des_clustering_proba_integration.npy') assert np.allclose(probas, expected)
def test_des_clustering_proba(): from sklearn.cluster import KMeans pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) cluster = KMeans(n_clusters=5, random_state=rng) des_clustering = DESClustering(pool_classifiers, clustering=cluster) des_clustering.fit(X_dsel, y_dsel) probas = des_clustering.predict_proba(X_test) expected = np.load( 'deslib/tests/expected_values/des_clustering_proba_integration.npy') assert np.allclose(probas, expected)
def test_fit_homogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers()*2, k=2, pct_accuracy=0.5, pct_diversity=0.33) clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex1) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert clustering_test.accuracy_cluster[0, 1] == 0.0 and clustering_test.accuracy_cluster[0, [0, 2]].all() == 1.0 assert clustering_test.accuracy_cluster[1, 1] == 1.0 and clustering_test.accuracy_cluster[1, [0, 2]].all() == 0.0 for idx in clustering_test.indices[0, :]: assert idx in (0, 2, 3, 5)
def test_fit_heterogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33) clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) # Index selected should be of any classifier that predicts the class label 0 assert np.isclose(clustering_test.accuracy_cluster[:, 1], [0.428, 0.375], atol=0.01).all() assert np.isclose(clustering_test.accuracy_cluster[:, 0], [0.572, 0.625], atol=0.01).all() assert clustering_test.indices[0, 0] == 0 or clustering_test.indices[0, 0] == 2 assert clustering_test.indices[1, 0] == 0 or clustering_test.indices[1, 0] == 2
def test_estimate_competence(): query = np.atleast_2d([1, 1]) clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33) clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) clustering_test.roc_algorithm.predict = MagicMock(return_value=0) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.accuracy_cluster[0, :]) clustering_test.roc_algorithm.predict = MagicMock(return_value=1) competences = clustering_test.estimate_competence(query) assert np.array_equal(competences, clustering_test.accuracy_cluster[1, :])
def test_fit_clusters_less_diverse(): clustering_test = DESClustering(create_pool_classifiers() * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33, more_diverse=False) clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex1) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert (clustering_test.accuracy_cluster_[0, 1] == 0.0 and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0) assert (clustering_test.accuracy_cluster_[1, 1] == 1.0 and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0) for idx in clustering_test.indices_[0, :]: assert idx in (1, 3, 4, 5)
def test_fit_clusters_less_diverse(example_estimate_competence, create_pool_classifiers): clustering_test = DESClustering(create_pool_classifiers * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33, more_diverse=False) X, y = example_estimate_competence[0:2] clustering_test.clustering.predict = MagicMock(return_value=y) clustering_test.fit(X, y) assert (clustering_test.performance_cluster_[0, 1] == 0.0 and clustering_test.performance_cluster_[0, [0, 2]].all() == 1.0) assert (clustering_test.performance_cluster_[1, 1] == 1.0 and clustering_test.performance_cluster_[1, [0, 2]].all() == 0.0) for idx in clustering_test.indices_[0, :]: assert idx in (1, 3, 4, 5)
def test_fit_homogeneous_clusters(create_pool_classifiers, example_estimate_competence): clustering_test = DESClustering(create_pool_classifiers * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) X, y = example_estimate_competence[0:2] clustering_test.clustering.predict = MagicMock(return_value=y) clustering_test.fit(X, y) assert (clustering_test.accuracy_cluster_[0, 1] == 0.0 and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0) assert (clustering_test.accuracy_cluster_[1, 1] == 1.0 and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0) for idx in clustering_test.indices_[0, :]: assert idx in (0, 2, 3, 5)
def test_fit_homogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers() * 2, clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex1) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert clustering_test.accuracy_cluster_[ 0, 1] == 0.0 and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0 assert clustering_test.accuracy_cluster_[ 1, 1] == 1.0 and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0 for idx in clustering_test.indices_[0, :]: assert idx in (0, 2, 3, 5)
def test_fit_heterogeneous_clusters(): clustering_test = DESClustering(create_pool_classifiers(), clustering=KMeans(n_clusters=2), pct_accuracy=0.5, pct_diversity=0.33) clustering_test.clustering.predict = MagicMock( return_value=return_cluster_index_ex2) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) # Index selected should be of any classifier that predicts the label 0 assert np.isclose(clustering_test.accuracy_cluster_[:, 1], [0.428, 0.375], atol=0.01).all() assert np.isclose(clustering_test.accuracy_cluster_[:, 0], [0.572, 0.625], atol=0.01).all() assert clustering_test.indices_[0, 0] == 0 or clustering_test.indices_[0, 0] == 2 assert clustering_test.indices_[1, 0] == 0 or clustering_test.indices_[1, 0] == 2
def test_fit_clusters_less_diverse(): clustering_test = DESClustering(create_pool_classifiers() * 2, k=2, pct_accuracy=1.0, pct_diversity=0.60, more_diverse=False) clustering_test.roc_algorithm.fit_predict = MagicMock( return_value=return_cluster_index_ex1) clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers) clustering_test.fit(X_dsel_ex1, y_dsel_ex1) assert clustering_test.accuracy_cluster[ 0, 1] == 0.0 and clustering_test.accuracy_cluster[0, [0, 2]].all() == 1.0 assert clustering_test.accuracy_cluster[ 1, 1] == 1.0 and clustering_test.accuracy_cluster[1, [0, 2]].all() == 0.0 assert np.isin(clustering_test.indices[0, :], np.array([1, 3, 5, 4])).all()
def test_not_clustering_algorithm(create_X_y): X, y = create_X_y des_clustering = DESClustering(clustering=Perceptron()) with pytest.raises(ValueError): des_clustering.fit(X, y)
def test_J_higher_than_N(create_X_y): X, y = create_X_y with pytest.raises(ValueError): des_clustering = DESClustering(pct_accuracy=0.3, pct_diversity=0.5) des_clustering.fit(X, y)
def test_J_N_values(create_X_y): X, y = create_X_y with pytest.raises(ValueError): des_clustering = DESClustering(pct_accuracy=0.5, pct_diversity=0) des_clustering.fit(X, y)
def test_input_diversity_parameter(create_X_y): X, y = create_X_y with pytest.raises(ValueError): des_clustering = DESClustering(metric_diversity='abc') des_clustering.fit(X, y)
def test_not_clustering_algorithm(): des_clustering = DESClustering(create_pool_classifiers(), clustering=Perceptron()) with pytest.raises(ValueError): des_clustering.fit(X_dsel_ex1, y_dsel_ex1)
def test_input_diversity_parameter(): with pytest.raises(ValueError): des_clustering = DESClustering(create_pool_classifiers() * 10, metric='abc') des_clustering.fit(X_dsel_ex1, y_dsel_ex1)
def test_J_higher_than_N(): with pytest.raises(ValueError): des_clustering = DESClustering(create_pool_classifiers() * 100, pct_accuracy=0.3, pct_diversity=0.5) des_clustering.fit(X_dsel_ex1, y_dsel_ex1)