def test_apriori(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) apriori = APriori(pool_classifiers, random_state=rng) apriori.fit(X_dsel, y_dsel) assert np.isclose(apriori.score(X_test, y_test), 0.6878787878787879)
def test_apriori(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) apriori = APriori(pool_classifiers, rng=rng, DFP=True) apriori.fit(X_dsel, y_dsel) assert np.isclose(apriori.score(X_test, y_test), 0.87272727272727268)
def test_apriori(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() rng = np.random.RandomState(123456) apriori = APriori(pool_classifiers, random_state=rng, knn_classifier=knn_methods) apriori.fit(X_dsel, y_dsel) assert np.isclose(apriori.score(X_test, y_test), 0.97872340425531912)
def test_fit(create_pool_classifiers, create_X_y): X, y = create_X_y a_priori_test = APriori(create_pool_classifiers) a_priori_test.fit(X, y) expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]]) expected = np.tile(expected, (15, 1, 1)) assert np.array_equal(a_priori_test.dsel_scores_, expected)
def fit(self, x_sel, y_sel, P, k): ''' metodo para chamar o tipo de DS :param: x_sel: dados de treinamento da janela de validacao :param: y_sel: rotulos da janela de validacao :param: P: pool de classificadores :param: k: vizinhanca ''' # escolhendo a tecnica de selecao de classificadores if (self.TYPE == 'knorae'): DS = KNORAE(P, k) elif (self.TYPE == 'knorau'): DS = KNORAU(P, k) elif (self.TYPE == 'ola'): DS = OLA(P, k) elif (self.TYPE == 'lca'): DS = LCA(P, k) elif (self.TYPE == 'posteriori'): DS = APosteriori(P, k) elif (self.TYPE == 'priori'): DS = APriori(P, k) # encontrando os classificadores competentes do DS escolhido self.DS = copy.deepcopy(DS) self.DS.fit(x_sel, y_sel)
def test_not_predict_proba(create_X_y): X, y = create_X_y clf1 = Perceptron() clf1.fit(X, y) with pytest.raises(ValueError): APriori([clf1, clf1]).fit(X, y)
def test_not_predict_proba(): X = X_dsel_ex1 y = y_dsel_ex1 clf1 = Perceptron() clf1.fit(X, y) with pytest.raises(ValueError): APriori([clf1, clf1])
def test_estimate_competence_all_ones(index, expected): query = np.array([1, 1]) a_priori_test = APriori(create_pool_classifiers()) a_priori_test.processed_dsel = dsel_processed_ex1 a_priori_test.dsel_scores = dsel_scores_all_ones a_priori_test.DSEL_target = y_dsel_ex1 a_priori_test.n_classes = 2 a_priori_test.neighbors = neighbors_ex1[index, :] a_priori_test.distances = distances_all_ones[index, :] a_priori_test.DFP_mask = [1, 1, 1] competences = a_priori_test.estimate_competence(query.reshape(1, -1)) assert np.isclose(competences, expected).all()
def test_estimate_competence_kuncheva_ex(): query = np.array([1, 1]) a_priori_test = APriori([create_base_classifier(return_value=1)], k=k_ex_kuncheva) a_priori_test.processed_dsel = dsel_processed_kuncheva a_priori_test.dsel_scores = dsel_scores_ex_kuncheva a_priori_test.DSEL_target = y_dsel_ex_kuncheva_independent a_priori_test.n_classes = n_classes_ex_kuncheva a_priori_test.neighbors = neighbors_ex_kuncheva a_priori_test.distances = distances_ex_kuncheva a_priori_test.DFP_mask = [1] competences = a_priori_test.estimate_competence(query.reshape(1, -1)) assert np.isclose(competences, 0.70, atol=0.01)
def test_estimate_competence_batch(): query = np.ones((3, 2)) expected = np.array([[0.333333, 0.50000, 0.40000], [0.666666, 0.50000, 0.60000], [0.000000, 0.50000, 0.20000]]) # Using 3 neighbors to facilitate the calculations a_priori_test = APriori(create_pool_classifiers(), 3) a_priori_test.processed_dsel = dsel_processed_ex1 a_priori_test.dsel_scores = dsel_scores_ex1 a_priori_test.DSEL_target = y_dsel_ex1 a_priori_test.n_classes = 2 a_priori_test.neighbors = neighbors_ex1[:, 0:3] a_priori_test.distances = distances_all_ones[:, 0:3] a_priori_test.DFP_mask = np.ones((3, 3)) competences = a_priori_test.estimate_competence(query) assert np.allclose(competences, expected, atol=0.01)
def test_estimate_competence_kuncheva_ex(example_kuncheva): a_priori_test = APriori(k=example_kuncheva['k']) test_example = example_kuncheva a_priori_test.DSEL_processed_ = test_example['dsel_processed'] a_priori_test.dsel_scores_ = test_example['dsel_scores'] a_priori_test.DSEL_target_ = test_example['y_independent'] a_priori_test.n_classes_ = test_example['n_classes'] neighbors = test_example['neighbors'].reshape(1, -1) distances = test_example['distances'].reshape(1, -1) competences = a_priori_test.estimate_competence(neighbors, distances) assert np.isclose(competences, 0.70, atol=0.01)
def test_estimate_competence_all_ones(index, expected, example_all_ones): X, y, neighbors, distances, dsel_processed, dsel_scores = example_all_ones a_priori_test = APriori() a_priori_test.DSEL_processed_ = dsel_processed a_priori_test.dsel_scores_ = dsel_scores a_priori_test.DSEL_target_ = y a_priori_test.n_classes_ = 2 neighbors = neighbors[index, :].reshape(1, -1) distances = distances[index, :].reshape(1, -1) competences = a_priori_test.estimate_competence(neighbors, distances) assert np.isclose(competences, expected).all()
def test_estimate_competence_batch(example_estimate_competence): _, y, nn, _, dsel_processed, dsel_scores = example_estimate_competence expected = np.array([[0.333333, 0.50000, 0.40000], [0.666666, 0.50000, 0.60000], [0.000000, 0.50000, 0.20000]]) # Using 3 neighbors to facilitate the calculations a_priori_test = APriori(k=3) a_priori_test.DSEL_processed_ = dsel_processed a_priori_test.dsel_scores_ = dsel_scores a_priori_test.DSEL_target_ = y a_priori_test.n_classes_ = 2 nn = nn[:, 0:3] distances = np.ones((3, 3)) competences = a_priori_test.estimate_competence(nn, distances) assert np.allclose(competences, expected, atol=0.01)
test_size=0.33, random_state=rng) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Considering a pool composed of 10 base classifiers pool_classifiers = RandomForestClassifier(n_estimators=10, random_state=rng, max_depth=10) pool_classifiers.fit(X_train, y_train) # DS techniques without DFP apriori = APriori(pool_classifiers) aposteriori = APosteriori(pool_classifiers) ola = OLA(pool_classifiers) lca = LCA(pool_classifiers) desp = DESP(pool_classifiers) meta = METADES(pool_classifiers) apriori.fit(X_dsel, y_dsel) aposteriori.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) lca.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) print('Evaluating DS techniques:') print('Classification accuracy of OLA: ', ola.score(X_test, y_test))
def test_check_estimator(): check_estimator(APriori(selection_method='best'))
# Some dynamic selection techniques requires that the base classifiers estimate # probabilities in order to estimate its competence level. Since the Perceptron # model is not a probabilistic classifier (does not implements the # predict_proba method, it needs to be calibrated for # probability estimation before being used by such DS techniques. This step can # be conducted using the CalibrateClassifierCV class from scikit-learn. Note # that in this example we pass a prefited pool of classifiers to the # calibration method in order to use exactly the same pool used in the other # DS methods. calibrated_pool = [] for clf in pool_classifiers: calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit') calibrated.fit(X_dsel, y_dsel) calibrated_pool.append(calibrated) apriori = APriori(calibrated_pool, random_state=rng) meta = METADES(calibrated_pool) knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) apriori.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ############################################################################### # Evaluating the methods # ----------------------- # Let's now evaluate the methods on the test set. We also use the performance # of Bagging (pool of classifiers without any selection) as a baseline
def main(): ############################################################################### # Preparing the dataset # --------------------- # In this part we load the breast cancer dataset from scikit-learn and # preprocess it in order to pass to the DS models. An important point here is # to normalize the data so that it has zero mean and unit variance, which is # a common requirement for many machine learning algorithms. # This step can be easily done using the StandardScaler class. rng = np.random.RandomState(123) data = load_breast_cancer() X = data.data y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Train a pool of 100 base classifiers pool_classifiers = BaggingClassifier(Perceptron(max_iter=10), n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) # Initialize the DS techniques knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers) desp = DESP(pool_classifiers) ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers) ############################################################################### # Calibrating base classifiers # ----------------------------- # Some dynamic selection techniques requires that the base classifiers estimate # probabilities in order to estimate its competence level. Since the Perceptron # model is not a probabilistic classifier (does not implements the # predict_proba method, it needs to be calibrated for # probability estimation before being used by such DS techniques. This step can # be conducted using the CalibrateClassifierCV class from scikit-learn. Note # that in this example we pass a prefited pool of classifiers to the # calibration method in order to use exactly the same pool used in the other # DS methods. calibrated_pool = [] for clf in pool_classifiers: calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit') calibrated.fit(X_dsel, y_dsel) calibrated_pool.append(calibrated) apriori = APriori(calibrated_pool) meta = METADES(calibrated_pool) knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) apriori.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ############################################################################### # Evaluating the methods # ----------------------- # Let's now evaluate the methods on the test set. We also use the performance # of Bagging (pool of classifiers without any selection) as a baseline # comparison. We can see that the majority of DS methods achieve higher # classification accuracy. print('Evaluating DS techniques:') print('Classification accuracy KNORA-Union: ', knorau.score(X_test, y_test)) print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test)) print('Classification accuracy DESP: ', desp.score(X_test, y_test)) print('Classification accuracy OLA: ', ola.score(X_test, y_test)) print('Classification accuracy A priori: ', apriori.score(X_test, y_test)) print('Classification accuracy MCB: ', mcb.score(X_test, y_test)) print('Classification accuracy META-DES: ', meta.score(X_test, y_test)) print('Classification accuracy Bagging: ', pool_classifiers.score(X_test, y_test))
def test_fit(): a_priori_test = APriori(create_pool_classifiers()) a_priori_test.fit(X_dsel_ex1, y_dsel_ex1) expected = np.array([[0.5, 0.5], [1.0, 0.0], [0.33, 0.67]]) expected = np.tile(expected, (15, 1, 1)) assert np.array_equal(a_priori_test.dsel_scores, expected)
def _generate_local_pool(self, query): """ Local pool generation. This procedure populates the "pool_classifiers" based on the query sample's neighborhood. Thus, for each query sample, a different pool is created. In each iteration, the training samples near the query sample are singled out and a subpool is generated using the Self-Generating Hyperplanes (SGH) method. Then, the DCS technique selects the best classifier in the generated subpool and it is added to the local pool. In the following iteration, the neighborhood is increased and another SGH-generated subpool is obtained over the new neighborhood, and again the DCS technique singles out the best in it, which is then added to the local pool. This process is repeated until the pool reaches "n_classifiers". Parameters ---------- query : array of shape = [n_features] The test sample. Returns ------- self References ---------- M. A. Souza, G. D. Cavalcanti, R. M. Cruz, R. Sabourin, On the characterization of the oracle for dynamic classi er selection, in: International Joint Conference on Neural Networks, IEEE, 2017, pp. 332-339. """ n_samples, _ = self.DSEL_data.shape self.pool_classifiers = [] n_err = 0 max_err = 2 * self.n_classifiers curr_k = self.k # Classifier count n = 0 while n < self.n_classifiers and n_err < max_err: subpool = SGH() included_samples = np.zeros((n_samples), int) if self.knne: idx_neighb = np.array([], dtype=int) # Obtain neighbors of each class individually for j in np.arange(0, self.n_classes): # Obtain neighbors from the classes in the RoC if np.any(self.classes[j] == self.DSEL_target[ self.neighbors[0][np.arange(0, curr_k)]]): nc = np.where(self.classes[j] == self.DSEL_target[ self.neighbors[0]]) idx_nc = self.neighbors[0][nc] idx_nc = idx_nc[np.arange( 0, np.minimum(curr_k, len(idx_nc)))] idx_neighb = np.concatenate((idx_neighb, idx_nc), axis=0) else: idx_neighb = np.asarray(self.neighbors)[0][np.arange( 0, curr_k)] # Indicate participating instances in the training of the subpool included_samples[idx_neighb] = 1 curr_classes = np.unique(self.DSEL_target[idx_neighb]) # If there are +1 classes in the local region if len(curr_classes) > 1: # Obtain SGH pool subpool.fit(self.DSEL_data, self.DSEL_target, included_samples) # Adjust chosen DCS technique parameters if self.ds_tech == 'ola': ds = OLA(subpool, k=len(idx_neighb)) # change for self.k elif self.ds_tech == 'lca': ds = LCA(subpool, k=len(idx_neighb)) elif self.ds_tech == 'mcb': ds = MCB(subpool, k=len(idx_neighb)) elif self.ds_tech == 'mla': ds = MLA(subpool, k=len(idx_neighb)) elif self.ds_tech == 'a_priori': ds = APriori(subpool, k=len(idx_neighb)) elif self.ds_tech == 'a_posteriori': ds = APosteriori(subpool, k=len(idx_neighb)) # Fit ds technique ds.fit(self.DSEL_data, self.DSEL_target) neighb = np.in1d( self.neighbors, idx_neighb) # True/False vector of selected neighbors # Set distances and neighbors of the query sample (already calculated) ds.distances = np.asarray([self.distances[0][neighb] ]) # Neighborhood ds.neighbors = np.asarray([self.neighbors[0][neighb] ]) # Neighborhood ds.DFP_mask = np.ones(ds.n_classifiers) # Estimate competence comp = ds.estimate_competence(query, ds._predict_base(query)) # Select best classifier in subpool sel_c = ds.select(comp) # Add to local pool self.pool_classifiers.append(copy.deepcopy(subpool[sel_c[0]])) n += 1 # else: # # Exception: fewer than 2 classes in the neighborhood # print('OPS! Next!') # Increase neighborhood size curr_k += 2 n_err += 1 return self
# Considering a pool composed of 10 base classifiers # Calibrating Perceptrons to estimate probabilities model = CalibratedClassifierCV(Perceptron(max_iter=100)) # Train a pool of 10 classifiers pool_classifiers = BaggingClassifier(model, n_estimators=100) pool_classifiers.fit(X_train, y_train) # Initialize the DS techniques knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers) desp = DESP(pool_classifiers) ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers) apriori = APriori(pool_classifiers) meta = METADES(pool_classifiers) # Fit the des techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) # Fit the dcs techniques ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) apriori.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) # Calculate classification accuracy of each technique print('Evaluating DS techniques:')
def test_fit(): a_priori_test = APriori(create_pool_classifiers()) a_priori_test.fit(X_dsel_ex1, y_dsel_ex1) assert np.isclose(a_priori_test.dsel_scores, [0.5, 0.5, 1.0, 0.0, 0.33, 0.67]).all()