def test_estimate_competence_batch(example_estimate_competence, create_pool_classifiers): _, y, nn, _, dsel_processed, dsel_scores = example_estimate_competence query = np.ones((3, 1)) meta_test = METADES(pool_classifiers=create_pool_classifiers) meta_test.n_classifiers_ = 3 n_meta_features = 21 meta_test.meta_classifier_ = GaussianNB # Set the state of the system which is set by the fit method. meta_test.DSEL_processed_ = dsel_processed meta_test.dsel_scores_ = dsel_scores meta_test.DSEL_target_ = y nn = nn meta_test._get_similar_out_profiles = MagicMock( return_value=(None, nn[:, 0:meta_test.Kp])) meta_test.compute_meta_features = MagicMock( return_value=np.ones((9, n_meta_features))) meta_test.meta_classifier_.predict_proba = MagicMock( return_value=np.tile([0.0, 0.8], (9, 1))) probabilities = [] for clf in meta_test.pool_classifiers: probabilities.append(clf.predict_proba(query)) probabilities = np.array(probabilities).transpose((1, 0, 2)) expected = np.ones((3, 3)) * 0.8 competences = meta_test.estimate_competence_from_proba( query, nn, probabilities) assert np.array_equal(competences, expected)
def test_meta_classifier_not_none(): X = np.random.rand(100, 2) y = np.random.randint(0, 2, 100) meta = METADES(meta_classifier=GaussianNB()) meta.fit(X, y) check_is_fitted(meta.meta_classifier_, "classes_") assert isinstance(meta.meta_classifier_, GaussianNB)
def test_compute_meta_features(): query = np.ones((1, 2)) pool = create_pool_classifiers() meta_test = METADES(pool_classifiers=[pool[0]]) meta_test.n_classifiers_ = 1 meta_test.k_ = 7 meta_test.Kp_ = 5 # Considering only one classifier in the pool (index = 0) meta_test.DSEL_processed_ = dsel_processed_ex1[:, 0].reshape(-1, 1) meta_test.dsel_scores_ = dsel_scores_ex1[:, 0, :].reshape( 15, 1, 2) # 15 samples, 1 base classifier, 2 classes meta_test.DSEL_target_ = y_dsel_ex1 meta_test.n_classes_ = 2 neighbors = neighbors_ex1[0, :] neighbors_op = neighbors_ex1[2, 0:meta_test.Kp] # Expected values for each meta feature based on the data of ex1. expected_f1 = [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0] expected_f2 = [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0] expected_f3 = [4.0 / 7.0] expected_f4 = [0.0, 1.0, 1.0, 1.0, 0.0] expected_f5 = [0.5] scores = np.empty( (query.shape[0], meta_test.n_classifiers_, meta_test.n_classes_)) for index, clf in enumerate(meta_test.pool_classifiers): scores[:, index, :] = clf.predict_proba(query) meta_features = meta_test.compute_meta_features(scores, neighbors, neighbors_op) expected = np.asarray(expected_f1 + expected_f2 + expected_f3 + expected_f4 + expected_f5) assert np.array_equal(meta_features, expected.reshape(1, -1))
def test_meta_classifier_not_predict_proba(create_pool_classifiers, model): X = np.random.rand(10, 2) y = np.ones(10) y[:5] = 0 with pytest.raises(ValueError): meta = METADES(create_pool_classifiers, model) meta.fit(X, y)
def test_parameter_gamma(selection_threshold, create_pool_classifiers): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises((ValueError, TypeError)): meta = METADES(create_pool_classifiers, selection_threshold=selection_threshold) meta.fit(X, y)
def test_select(): meta_test = METADES() competences = np.asarray( [0.8, 0.6, 0.7, 0.2, 0.3, 0.4, 0.6, 0.1, 1.0, 0.98]) expected = np.asarray( [True, True, True, False, False, False, True, False, True, True]) selected_matrix = meta_test.select(competences) assert np.array_equal(selected_matrix, expected.reshape(1, -1))
def test_sample_selection_working(): meta_test = METADES( create_pool_all_agree(0, 10) + create_pool_all_agree(1, 5)) meta_test.processed_dsel = np.ones((5, 15)) meta_test.processed_dsel[(1, 3, 4), 5:] = 0 expected = np.asarray([1, 1 / 3, 1, 1 / 3, 1 / 3]) value = meta_test._sample_selection_agreement() assert np.array_equal(value, expected)
def test_not_predict_proba(): X = X_dsel_ex1 y = y_dsel_ex1 clf1 = Perceptron() clf1.fit(X, y) with pytest.raises(ValueError): meta = METADES([clf1, clf1]) meta.fit(X, y)
def test_not_predict_proba(create_X_y): X, y = create_X_y clf1 = Perceptron() clf1.fit(X, y) with pytest.raises(ValueError): meta = METADES([clf1, clf1]) meta.fit(X, y)
def test_select_batch(): meta_test = METADES() competences = np.tile( np.array([0.8, 0.6, 0.7, 0.2, 0.3, 0.4, 0.6, 0.1, 1.0, 0.98]), (10, 1)) expected = np.tile( [True, True, True, False, False, False, True, False, True, True], (10, 1)) selected_matrix = meta_test.select(competences) assert np.array_equal(selected_matrix, expected)
def test_fitted_meta_classifier(): X = np.random.rand(100, 2) y = np.random.randint(0, 2, 100) meta = METADES(meta_classifier=GaussianNB()) meta.fit(X, y) meta2 = METADES(meta_classifier=meta.meta_classifier_) meta2.fit(X, y) assert meta.meta_classifier_ == meta2.meta_classifier_
def __init__( self, name: str, model_params: Dict[str, Any], classifier_paths: Iterable[Tuple[str, str]], ) -> None: super().__init__(name, model_params, classifier_paths) self._selector = METADES(self.classifiers, **model_params)
def test_meta_no_pool_of_classifiers(knn_methods): rng = np.random.RandomState(123456) data = load_breast_cancer() X = data.data y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scalar = StandardScaler() X_train = scalar.fit_transform(X_train) X_test = scalar.transform(X_test) meta_des = METADES(knn_classifier=knn_methods, random_state=rng, DSEL_perc=0.5) meta_des.fit(X_train, y_train) assert np.isclose(meta_des.score(X_test, y_test), 0.9095744680851063)
def test_meta(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers, DFP=True) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), 0.9121212121212121) or \ np.isclose(meta_des.score(X_test, y_test), 0.8909090909090909)
def test_sample_selection_working(): meta_test = METADES() meta_test.n_classifiers_ = 15 meta_test.DSEL_processed_ = np.ones((5, 15)) meta_test.DSEL_processed_[(1, 3, 4), 5:] = 0 expected = np.asarray([1, 1 / 3, 1, 1 / 3, 1 / 3]) value = meta_test._sample_selection_agreement() assert np.array_equal(value, expected)
def test_sample_selection_working(): pool_classifiers = create_pool_all_agree(0, 10) + create_pool_all_agree( 1, 5) meta_test = METADES(pool_classifiers=pool_classifiers) meta_test.n_classifiers_ = len(pool_classifiers) meta_test.DSEL_processed_ = np.ones((5, 15)) meta_test.DSEL_processed_[(1, 3, 4), 5:] = 0 expected = np.asarray([1, 1 / 3, 1, 1 / 3, 1 / 3]) value = meta_test._sample_selection_agreement() assert np.array_equal(value, expected)
def test_compute_meta_features(): query = np.atleast_2d([1, 2]) pool = create_pool_classifiers() meta_test = METADES(pool) meta_test.processed_dsel = dsel_processed_ex1 meta_test.dsel_scores = dsel_scores_ex1 meta_test.DSEL_target = y_dsel_ex1 meta_test.n_classes = 3 neighbors = neighbors_ex1[0, :] neighbors_op = neighbors_ex1[2, 0:meta_test.Kp] # Expected values for each meta feature based on the data of ex1. expected_f1 = [1, 0, 1, 1, 1, 0, 0] expected_f2 = [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0] expected_f3 = [4.0 / 7.0] expected_f4 = [0, 1, 1, 1, 0] expected_f5 = [0.5] meta_features = meta_test.compute_meta_features(query, neighbors, neighbors_op, pool[0], 0) expected = expected_f1 + expected_f2 + expected_f3 + expected_f4 + expected_f5 assert meta_features == expected
def test_meta(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers, knn_classifier=knn_methods) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), 0.973404255319149)
def test_parameter_gamma(gamma): with pytest.raises((ValueError, TypeError)): METADES(create_pool_classifiers(), gamma=gamma)
def test_parameter_Hc(Hc): with pytest.raises((ValueError, TypeError)): METADES(create_pool_classifiers(), Hc=Hc)
def test_sample_selection(): meta_test = METADES( create_pool_all_agree(0, 10) + create_pool_all_agree(1, 5)) meta_test.processed_dsel = dsel_processed_ex1 value = meta_test._sample_selection_agreement(0) assert value == 0.2
def test_meta_classifier_not_predict_proba(): with pytest.raises(ValueError): METADES(create_pool_classifiers(), Perceptron())
def test_meta_classifier_is_none(): with pytest.warns(Warning): METADES(create_pool_classifiers(), meta_classifier=None)
def test_select_no_competent_classifiers(): meta_test = METADES(create_pool_classifiers()) competences = np.zeros(meta_test.n_classifiers) indices = meta_test.select(competences) assert indices == list(range(meta_test.n_classifiers))
def test_select(): meta_test = METADES(create_pool_classifiers()) competences = np.array([0.8, 0.6, 0.7, 0.2, 0.3, 0.4, 0.6, 0.1, 1.0, 0.98]) indices = meta_test.select(competences) assert set(indices) == {0, 1, 2, 6, 8, 9}
def test_estimate_competence(): query = np.atleast_2d([1, 1]) meta_test = METADES(create_pool_classifiers()) # Set the state of the system which is set by the fit method. meta_test.processed_dsel = dsel_processed_ex1 meta_test.dsel_scores = dsel_scores_ex1 meta_test.DSEL_target = y_dsel_ex1 meta_test.n_classes = 3 meta_test.meta_classifier = GaussianNB() meta_test.neighbors = neighbors_ex1 meta_test.distances = distances_ex1 meta_test._get_similar_out_profiles = MagicMock( return_value=[0, neighbors_ex1[2, 0:meta_test.Kp]]) meta_test.meta_classifier.predict_proba = MagicMock( return_value=np.array([[0.0, 0.8]])) meta_test.DFP_mask = np.array([1, 0, 1]) competences = meta_test.estimate_competence(query) assert np.allclose(competences, [0.8, 0.0, 0.8])
def test_meta(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), 0.796969696969697)
RF.fit(X_train, y_train) X_train, X_dsel, y_train, y_dsel = train_test_split(X, y, test_size=0.50) # Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it # can estimate probabilities pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5) pool_classifiers.fit(X_train, y_train) # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers, k=5) desp = DESP(pool_classifiers, k=5) ola = OLA(pool_classifiers, k=5) mcb = MCB(pool_classifiers, k=5) meta = METADES(pool_classifiers, k=5) # Fit the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) # Calculate classification accuracy of each technique print('Classification accuracy RF: ', RF.score(X_test, y_test)) print('Evaluating DS techniques:') print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test)) print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test))
elif m == 'AdaBoost-DT': bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm='SAMME', n_estimators=200, learning_rate=0.8) bdt.fit(Feature_train, Label_train.ravel()) Label_predict = bdt.predict(Feature_test) elif m == 'SMOTE-AdaBoost-DT': sm = SMOTE() Feature_train_o, Label_train_o = sm.fit_sample(Feature_train, Label_train.ravel()) bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm='SAMME', n_estimators=200, learning_rate=0.8) bdt.fit(Feature_train_o, Label_train_o) Label_predict = bdt.predict(Feature_test) elif m == 'META-DES': pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(Feature_train, Label_train.ravel()) metades = METADES(pool_classifiers) metades.fit(Feature_train, Label_train.ravel()) Label_predict = metades.predict(Feature_test) elif m == 'MCB': pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(Feature_train, Label_train.ravel()) mcb = MCB(pool_classifiers) mcb.fit(Feature_train, Label_train.ravel()) Label_predict = mcb.predict(Feature_test) elif m == 'DES-MI': pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(Feature_train, Label_train.ravel()) dmi = DESMI(pool_classifiers) dmi.fit(Feature_train, Label_train.ravel()) Label_predict = dmi.predict(Feature_test) elif m == 'One_vs_Rest-SMOTE-XGBoost':
def test_estimate_competence(example_estimate_competence, create_pool_classifiers): _, y, nn, _, dsel_processed, dsel_scores = example_estimate_competence query = np.ones((1, 2)) meta_test = METADES(create_pool_classifiers) meta_test.n_classifiers_ = 3 meta_test.k_ = 7 meta_test.Kp_ = 5 # Set the state of the system which is set by the fit method. meta_test.DSEL_processed_ = dsel_processed meta_test.dsel_scores_ = dsel_scores meta_test.DSEL_target_ = y meta_test.n_classes_ = 2 meta_test.meta_classifier_ = GaussianNB() meta_test._get_similar_out_profiles = MagicMock( return_value=(None, nn[0, 0:meta_test.Kp])) meta_test.meta_classifier_.predict_proba = MagicMock( return_value=np.array([[0.2, 0.8], [1.0, 0.0], [0.2, 0.8]])) probabilities = [] for clf in meta_test.pool_classifiers: probabilities.append(clf.predict_proba(query)) probabilities = np.array(probabilities).transpose((1, 0, 2)) expected = np.array([[0.8, 0.0, 0.8]]) competences = meta_test.estimate_competence_from_proba( query, nn[0, :], probabilities) assert np.array_equal(competences, expected)