def faiss_KNORAE_knn(XTrain, YTrain, k, XTest, YTest): start = time.clock() knorae_sk = KNORAE(k=k, knn_classifier='faiss') knorae_sk.fit(XTrain, YTrain) score = knorae_sk.score(XTest, YTest) print("faiss_knn_knorae run_time: {}".format(time.clock() - start)) print("faiss_knn_knorae score: {}".format(score))
def test_kne_proba(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, DFP=True) kne.fit(X_dsel, y_dsel) probas = kne.predict_proba(X_test) expected = np.load('deslib/tests/expected_values/kne_proba_DFP.npy') assert np.allclose(probas, expected)
def test_kne_proba(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, knn_classifier=knn_methods) kne.fit(X_dsel, y_dsel) probas = kne.predict_proba(X_test) expected = np.load( 'deslib/tests/expected_values/kne_proba_integration.npy') assert np.allclose(probas, expected)
def test_estimate_competence(index, expected): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers) knora_e_test.neighbors = neighbors_ex1[index, :] knora_e_test.distances = distances_ex1[index, :] competences = knora_e_test.estimate_competence(query) assert np.isclose(competences, expected, atol=0.01).all()
def run_knorae(pool_classifiers, X_DSEL, y_DSEL, X_test, y_test, knn_type): knorae = KNORAE(pool_classifiers=pool_classifiers, knn_classifier=knn_type) knorae.fit(X_DSEL, y_DSEL) start = time.clock() score = knorae.score(X_test, y_test) end = time.clock() - start return score, end
def test_select(index, expected): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) neighbors = neighbors_ex1[index, :].reshape(1, -1) competences = knora_e_test.estimate_competence(query, neighbors) selected = knora_e_test.select(competences) assert np.array_equal(selected, expected)
def test_estimate_competence_batch(): query = np.ones((3, 2)) expected = np.array([[1.0, 0.0, 1.0], [2.0, 0.0, 2.0], [0.0, 3.0, 0.0]]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) neighbors = neighbors_ex1 distances = distances_ex1 competences = knora_e_test.estimate_competence(query, neighbors, distances) assert np.allclose(competences, expected)
def test_select(index, expected): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers) knora_e_test.neighbors = neighbors_ex1[index, :] knora_e_test.distances = distances_ex1[index, :] competences = knora_e_test.estimate_competence(query) selected = knora_e_test.select(competences) assert selected == expected
def test_estimate_competence_batch(example_estimate_competence, create_pool_classifiers): X, y, neighbors, distances, _, _ = example_estimate_competence expected = np.array([[1.0, 0.0, 1.0], [2.0, 0.0, 2.0], [0.0, 3.0, 0.0]]) knora_e_test = KNORAE(create_pool_classifiers) knora_e_test.fit(X, y) competences = knora_e_test.estimate_competence(neighbors, distances=distances) assert np.allclose(competences, expected)
def test_select(index, expected, create_pool_classifiers, example_estimate_competence): X, y, neighbors, distances, _, _ = example_estimate_competence knora_e_test = KNORAE(create_pool_classifiers) knora_e_test.fit(X, y) neighbors = neighbors[index, :].reshape(1, -1) distances = distances[index, :].reshape(1, -1) competences = knora_e_test.estimate_competence(neighbors, distances=distances) selected = knora_e_test.select(competences) assert np.array_equal(selected, expected)
def test_select_none_competent(): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_all_agree(2, 100)) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) knora_e_test.neighbors = neighbors_ex1[0, :] knora_e_test.distances = distances_ex1[0, :] knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers) competences = knora_e_test.estimate_competence(query) indices = knora_e_test.select(competences) assert indices == list(range(knora_e_test.n_classifiers))
def DES(self, x_train, y_train,X_test, Y_test, dsel): pool_classifiers = BaggingClassifier(linear_model.Perceptron(max_iter=5), self.pool_size) pool_classifiers.fit(x_train, y_train) # Initialize the DES model knorae = KNORAE(pool_classifiers) knorau = KNORAU(pool_classifiers) # Preprocess the Dynamic Selection dataset (DSEL) score1 = knorae.fit(x_train[dsel], y_train[dsel]) score2 = knorau.fit(x_train[dsel], y_train[dsel]) # Predict new examples: # print (knorae.score(X_test, Y_test), knorau.score(X_test, Y_test)) return (score1, score2, ) + self.calc_metrics(X_test, Y_test)
extra_clf = ExtraTreesClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42) svm_clf = SVC(probability=True, kernel="linear", C=float("inf")) # fit and predict rnd_clf.fit(X_train, y_train) extra_clf.fit(X_train, y_train) svm_clf.fit(X_train, y_train) hard_voting_clf = VotingClassifier(estimators=[('rf', rnd_clf), ('ex', ), ('svc', svm_clf)], voting='hard') # hard voting soft_voting_clf = VotingClassifier(estimators=[('rf', rnd_clf), ('ex', ), ('svc', svm_clf)], voting='soft') # soft voting # show each classifier's accuarcy score for clf in (rnd_clf, extra_clf, svm_clf, hard_voting_clf, soft_voting_clf): clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(clf.__class__.__name__, accuracy_score(y_test, y_pred)) # Exercise: 9 knorae = KNORAE(rnd_clf) knorae.fit(X_dsel, y_dsel) print(knorae.__class__.__name__, accuracy_score(y_test, y_pred))
def test_kne(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, knn_classifier=knn_methods) kne.fit(X_dsel, y_dsel) assert np.isclose(kne.score(X_test, y_test), 0.973404255319148)
# Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it # can estimate probabilities pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5) pool_classifiers.fit(X_train, y_train) # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers, k=5) desp = DESP(pool_classifiers, k=5) ola = OLA(pool_classifiers, k=5) mcb = MCB(pool_classifiers, k=5) meta = METADES(pool_classifiers, k=5) # Fit the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) # Calculate classification accuracy of each technique print('Classification accuracy RF: ', RF.score(X_test, y_test)) print('Evaluating DS techniques:') print('Classification accuracy KNORAU: ', knorau.score(X_test, y_test)) print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test)) print('Classification accuracy DESP: ', desp.score(X_test, y_test)) print('Classification accuracy OLA: ', ola.score(X_test, y_test)) print('Classification accuracy MCB: ', mcb.score(X_test, y_test)) print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
def main(): ############################################################################### # Preparing the dataset # --------------------- # In this part we load the breast cancer dataset from scikit-learn and # preprocess it in order to pass to the DS models. An important point here is # to normalize the data so that it has zero mean and unit variance, which is # a common requirement for many machine learning algorithms. # This step can be easily done using the StandardScaler class. rng = np.random.RandomState(123) data = load_breast_cancer() X = data.data y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=rng) # Scale the variables to have 0 mean and unit variance scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Train a pool of 100 base classifiers pool_classifiers = BaggingClassifier(Perceptron(max_iter=10), n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) # Initialize the DS techniques knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers) desp = DESP(pool_classifiers) ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers) ############################################################################### # Calibrating base classifiers # ----------------------------- # Some dynamic selection techniques requires that the base classifiers estimate # probabilities in order to estimate its competence level. Since the Perceptron # model is not a probabilistic classifier (does not implements the # predict_proba method, it needs to be calibrated for # probability estimation before being used by such DS techniques. This step can # be conducted using the CalibrateClassifierCV class from scikit-learn. Note # that in this example we pass a prefited pool of classifiers to the # calibration method in order to use exactly the same pool used in the other # DS methods. calibrated_pool = [] for clf in pool_classifiers: calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit') calibrated.fit(X_dsel, y_dsel) calibrated_pool.append(calibrated) apriori = APriori(calibrated_pool) meta = METADES(calibrated_pool) knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) apriori.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ############################################################################### # Evaluating the methods # ----------------------- # Let's now evaluate the methods on the test set. We also use the performance # of Bagging (pool of classifiers without any selection) as a baseline # comparison. We can see that the majority of DS methods achieve higher # classification accuracy. print('Evaluating DS techniques:') print('Classification accuracy KNORA-Union: ', knorau.score(X_test, y_test)) print('Classification accuracy KNORA-Eliminate: ', kne.score(X_test, y_test)) print('Classification accuracy DESP: ', desp.score(X_test, y_test)) print('Classification accuracy OLA: ', ola.score(X_test, y_test)) print('Classification accuracy A priori: ', apriori.score(X_test, y_test)) print('Classification accuracy MCB: ', mcb.score(X_test, y_test)) print('Classification accuracy META-DES: ', meta.score(X_test, y_test)) print('Classification accuracy Bagging: ', pool_classifiers.score(X_test, y_test))
def test_kne(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, DFP=True) kne.fit(X_dsel, y_dsel) assert np.isclose(kne.score(X_test, y_test), 0.9)
threshold = 0.4 #20 repetitions for rep in range(1,6): skf = StratifiedKFold(n_splits=4) for train_index, test_index in skf.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] neigh = KNeighborsClassifier(n_neighbors=k_neigh) neigh.fit(X_train, y_train) X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.66) pool_classifiers = es.fit(X_train, y_train).estimators_ knorau = KNORAE(pool_classifiers) knorau.fit(X_dsel, y_dsel) y_pred = [] for instance in X_test: #use_des = select_classifier(threshold, k_neigh, X_dsel, y_dsel, instance) use_des = True if(use_des): result = knorau.predict([instance]) y_pred.append(result[0]) else: result = neigh.predict([instance]) y_pred.append(result[0]) for name, metric in zip(['accuracy','roc_auc','gmean','f1'], [accuracy_score, roc_auc_score, gmean, f1_score]):