def test_parameter_gamma(selection_threshold, create_pool_classifiers): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises((ValueError, TypeError)): meta = METADES(create_pool_classifiers, selection_threshold=selection_threshold) meta.fit(X, y)
def test_meta_classifier_not_predict_proba(create_pool_classifiers, model): X = np.random.rand(10, 2) y = np.ones(10) y[:5] = 0 with pytest.raises(ValueError): meta = METADES(create_pool_classifiers, model) meta.fit(X, y)
def test_meta_classifier_not_none(): X = np.random.rand(100, 2) y = np.random.randint(0, 2, 100) meta = METADES(meta_classifier=GaussianNB()) meta.fit(X, y) check_is_fitted(meta.meta_classifier_, "classes_") assert isinstance(meta.meta_classifier_, GaussianNB)
def test_meta(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers, DFP=True) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), 0.9121212121212121) or \ np.isclose(meta_des.score(X_test, y_test), 0.8909090909090909)
def test_not_predict_proba(create_X_y): X, y = create_X_y clf1 = Perceptron() clf1.fit(X, y) with pytest.raises(ValueError): meta = METADES([clf1, clf1]) meta.fit(X, y)
def test_not_predict_proba(): X = X_dsel_ex1 y = y_dsel_ex1 clf1 = Perceptron() clf1.fit(X, y) with pytest.raises(ValueError): meta = METADES([clf1, clf1]) meta.fit(X, y)
def test_fitted_meta_classifier(): X = np.random.rand(100, 2) y = np.random.randint(0, 2, 100) meta = METADES(meta_classifier=GaussianNB()) meta.fit(X, y) meta2 = METADES(meta_classifier=meta.meta_classifier_) meta2.fit(X, y) assert meta.meta_classifier_ == meta2.meta_classifier_
def test_meta_no_pool_of_classifiers(knn_methods): rng = np.random.RandomState(123456) data = load_breast_cancer() X = data.data y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scalar = StandardScaler() X_train = scalar.fit_transform(X_train) X_test = scalar.transform(X_test) meta_des = METADES(knn_classifier=knn_methods, random_state=rng, DSEL_perc=0.5) meta_des.fit(X_train, y_train) assert np.isclose(meta_des.score(X_test, y_test), 0.9095744680851063)
def test_meta(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers, knn_classifier=knn_methods) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), 0.973404255319149)
def test_meta(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), 0.796969696969697)
# can estimate probabilities pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5) pool_classifiers.fit(X_train, y_train) # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers, k=5) desp = DESP(pool_classifiers, k=5) ola = OLA(pool_classifiers, k=5) mcb = MCB(pool_classifiers, k=5) meta = METADES(pool_classifiers, k=5) # Fit the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) # Calculate classification accuracy of each technique print('Classification accuracy RF: ', RF.score(X_test, y_test)) print('Evaluating DS techniques:') print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test)) print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test)) print('Classification accuracy DESP: ', desp.score(X_test, y_test)) print('Classification accuracy OLA: ', ola.score(X_test, y_test)) print('Classification accuracy MCB: ', mcb.score(X_test, y_test)) print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
def test_parameter_Hc(Hc, create_pool_classifiers): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises((ValueError, TypeError)): meta = METADES(create_pool_classifiers, Hc=Hc) meta.fit(X, y)
def main(): ############################################################################### # Preparing the dataset # --------------------- # In this part we load the breast cancer dataset from scikit-learn and # preprocess it in order to pass to the DS models. An important point here is # to normalize the data so that it has zero mean and unit variance, which is # a common requirement for many machine learning algorithms. # This step can be easily done using the StandardScaler class. rng = np.random.RandomState(123) data = load_breast_cancer() X = data.data y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Train a pool of 100 base classifiers pool_classifiers = BaggingClassifier(Perceptron(max_iter=10), n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) # Initialize the DS techniques knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers) desp = DESP(pool_classifiers) ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers) ############################################################################### # Calibrating base classifiers # ----------------------------- # Some dynamic selection techniques requires that the base classifiers estimate # probabilities in order to estimate its competence level. Since the Perceptron # model is not a probabilistic classifier (does not implements the # predict_proba method, it needs to be calibrated for # probability estimation before being used by such DS techniques. This step can # be conducted using the CalibrateClassifierCV class from scikit-learn. Note # that in this example we pass a prefited pool of classifiers to the # calibration method in order to use exactly the same pool used in the other # DS methods. calibrated_pool = [] for clf in pool_classifiers: calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit') calibrated.fit(X_dsel, y_dsel) calibrated_pool.append(calibrated) apriori = APriori(calibrated_pool) meta = METADES(calibrated_pool) knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) apriori.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ############################################################################### # Evaluating the methods # ----------------------- # Let's now evaluate the methods on the test set. We also use the performance # of Bagging (pool of classifiers without any selection) as a baseline # comparison. We can see that the majority of DS methods achieve higher # classification accuracy. print('Evaluating DS techniques:') print('Classification accuracy KNORA-Union: ', knorau.score(X_test, y_test)) print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test)) print('Classification accuracy DESP: ', desp.score(X_test, y_test)) print('Classification accuracy OLA: ', ola.score(X_test, y_test)) print('Classification accuracy A priori: ', apriori.score(X_test, y_test)) print('Classification accuracy MCB: ', mcb.score(X_test, y_test)) print('Classification accuracy META-DES: ', meta.score(X_test, y_test)) print('Classification accuracy Bagging: ', pool_classifiers.score(X_test, y_test))
def test_meta(knne, expected): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() meta_des = METADES(pool_classifiers, DFP=True, knne=knne) meta_des.fit(X_dsel, y_dsel) assert np.isclose(meta_des.score(X_test, y_test), expected)
pool_classifiers.fit(X_train, y_train) # DS techniques without DFP apriori = APriori(pool_classifiers) aposteriori = APosteriori(pool_classifiers) ola = OLA(pool_classifiers) lca = LCA(pool_classifiers) desp = DESP(pool_classifiers) meta = METADES(pool_classifiers) apriori.fit(X_dsel, y_dsel) aposteriori.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) lca.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) print('Evaluating DS techniques:') print('Classification accuracy of OLA: ', ola.score(X_test, y_test)) print('Classification accuracy of LCA: ', lca.score(X_test, y_test)) print('Classification accuracy of A priori: ', apriori.score(X_test, y_test)) print('Classification accuracy of A posteriori: ', aposteriori.score(X_test, y_test)) print('Classification accuracy of DES-P: ', desp.score(X_test, y_test)) print('Classification accuracy of META-DES: ', meta.score(X_test, y_test)) # Testing fire: fire_apriori = APriori(pool_classifiers, DFP=True) fire_aposteriori = APosteriori(pool_classifiers, DFP=True) fire_ola = OLA(pool_classifiers, DFP=True) fire_lca = LCA(pool_classifiers, DFP=True)
bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm='SAMME', n_estimators=200, learning_rate=0.8) bdt.fit(Feature_train, Label_train.ravel()) Label_predict = bdt.predict(Feature_test) elif m == 'SMOTE-AdaBoost-DT': sm = SMOTE() Feature_train_o, Label_train_o = sm.fit_sample(Feature_train, Label_train.ravel()) bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2, min_samples_split=20, min_samples_leaf=5), algorithm='SAMME', n_estimators=200, learning_rate=0.8) bdt.fit(Feature_train_o, Label_train_o) Label_predict = bdt.predict(Feature_test) elif m == 'META-DES': pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(Feature_train, Label_train.ravel()) metades = METADES(pool_classifiers) metades.fit(Feature_train, Label_train.ravel()) Label_predict = metades.predict(Feature_test) elif m == 'MCB': pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(Feature_train, Label_train.ravel()) mcb = MCB(pool_classifiers) mcb.fit(Feature_train, Label_train.ravel()) Label_predict = mcb.predict(Feature_test) elif m == 'DES-MI': pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(Feature_train, Label_train.ravel()) dmi = DESMI(pool_classifiers) dmi.fit(Feature_train, Label_train.ravel()) Label_predict = dmi.predict(Feature_test) elif m == 'One_vs_Rest-SMOTE-XGBoost': sm = SMOTE()
def test_meta_classifier_not_predict_proba(create_pool_classifiers): X = np.random.rand(10, 2) y = np.ones(10) with pytest.raises(ValueError): meta = METADES(create_pool_classifiers, Perceptron()) meta.fit(X, y)