def test_select_none_competent(): knora_e_test = KNORAE() competences = np.zeros(100) selected = knora_e_test.select(competences) expected = np.atleast_2d([True] * 100) assert np.array_equal(expected, selected)
def faiss_KNORAE_knn(XTrain, YTrain, k, XTest, YTest): start = time.clock() knorae_sk = KNORAE(k=k, knn_classifier='faiss') knorae_sk.fit(XTrain, YTrain) score = knorae_sk.score(XTest, YTest) print("faiss_knn_knorae run_time: {}".format(time.clock() - start)) print("faiss_knn_knorae score: {}".format(score))
def test_kne_proba(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, DFP=True) kne.fit(X_dsel, y_dsel) probas = kne.predict_proba(X_test) expected = np.load('deslib/tests/expected_values/kne_proba_DFP.npy') assert np.allclose(probas, expected)
def test_kne_proba(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, knn_classifier=knn_methods) kne.fit(X_dsel, y_dsel) probas = kne.predict_proba(X_test) expected = np.load( 'deslib/tests/expected_values/kne_proba_integration.npy') assert np.allclose(probas, expected)
def run_knorae(pool_classifiers, X_DSEL, y_DSEL, X_test, y_test, knn_type): knorae = KNORAE(pool_classifiers=pool_classifiers, knn_classifier=knn_type) knorae.fit(X_DSEL, y_DSEL) start = time.clock() score = knorae.score(X_test, y_test) end = time.clock() - start return score, end
def test_estimate_competence_batch(): query = np.ones((3, 2)) expected = np.array([[1.0, 0.0, 1.0], [2.0, 0.0, 2.0], [0.0, 3.0, 0.0]]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) neighbors = neighbors_ex1 distances = distances_ex1 competences = knora_e_test.estimate_competence(query, neighbors, distances) assert np.allclose(competences, expected)
def test_estimate_competence_batch(example_estimate_competence, create_pool_classifiers): X, y, neighbors, distances, _, _ = example_estimate_competence expected = np.array([[1.0, 0.0, 1.0], [2.0, 0.0, 2.0], [0.0, 3.0, 0.0]]) knora_e_test = KNORAE(create_pool_classifiers) knora_e_test.fit(X, y) competences = knora_e_test.estimate_competence(neighbors, distances=distances) assert np.allclose(competences, expected)
def DES(self, x_train, y_train,X_test, Y_test, dsel): pool_classifiers = BaggingClassifier(linear_model.Perceptron(max_iter=5), self.pool_size) pool_classifiers.fit(x_train, y_train) # Initialize the DES model knorae = KNORAE(pool_classifiers) knorau = KNORAU(pool_classifiers) # Preprocess the Dynamic Selection dataset (DSEL) score1 = knorae.fit(x_train[dsel], y_train[dsel]) score2 = knorau.fit(x_train[dsel], y_train[dsel]) # Predict new examples: # print (knorae.score(X_test, Y_test), knorau.score(X_test, Y_test)) return (score1, score2, ) + self.calc_metrics(X_test, Y_test)
def test_select(index, expected): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers) knora_e_test.neighbors = neighbors_ex1[index, :] knora_e_test.distances = distances_ex1[index, :] competences = knora_e_test.estimate_competence(query) selected = knora_e_test.select(competences) assert selected == expected
def escolher_modelo(nome, x_sel, y_sel, P, k): ''' metodo para chamar o tipo de DS :param: x_sel: dados de treinamento da janela de validacao :param: y_sel: rotulos da janela de validacao :param: P: pool de classificadores :param: k: vizinhanca ''' # escolhendo a tecnica de selecao de classificadores if(nome=='OLA'): DS = OLA(P, k) number_model = 0 elif(nome=='LCA'): DS = LCA(P, k) number_model = 1 elif(nome=='KNORAE'): DS = KNORAE(P, k) number_model = 2 elif(nome=='KNORAU'): DS = KNORAU(P, k) number_model = 3 # encontrando os classificadores competentes do DS escolhido DS.fit(x_sel, y_sel) # retornando a tecnica de DS return DS, number_model
def fit(self, x_sel, y_sel, P, k): ''' metodo para chamar o tipo de DS :param: x_sel: dados de treinamento da janela de validacao :param: y_sel: rotulos da janela de validacao :param: P: pool de classificadores :param: k: vizinhanca ''' # escolhendo a tecnica de selecao de classificadores if (self.TYPE == 'knorae'): DS = KNORAE(P, k) elif (self.TYPE == 'knorau'): DS = KNORAU(P, k) elif (self.TYPE == 'ola'): DS = OLA(P, k) elif (self.TYPE == 'lca'): DS = LCA(P, k) elif (self.TYPE == 'posteriori'): DS = APosteriori(P, k) elif (self.TYPE == 'priori'): DS = APriori(P, k) # encontrando os classificadores competentes do DS escolhido self.DS = copy.deepcopy(DS) self.DS.fit(x_sel, y_sel)
def test_grid_search(): # This tests if the estimator can be cloned and used in a grid search pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers) params = {'k': [1, 3, 5, 7]} grid = GridSearchCV(kne, params) grid.fit(X_dsel, y_dsel) grid.best_estimator_.score(X_test, y_test)
def test_estimate_competence(index, expected): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers) knora_e_test.neighbors = neighbors_ex1[index, :] knora_e_test.distances = distances_ex1[index, :] competences = knora_e_test.estimate_competence(query) assert np.isclose(competences, expected, atol=0.01).all()
def test_select_none_competent(): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_all_agree(2, 100)) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) knora_e_test.neighbors = neighbors_ex1[0, :] knora_e_test.distances = distances_ex1[0, :] knora_e_test.DFP_mask = np.ones(knora_e_test.n_classifiers) competences = knora_e_test.estimate_competence(query) indices = knora_e_test.select(competences) assert indices == list(range(knora_e_test.n_classifiers))
def train(train_index, test_index): X_train, X_test = X[train_index], X[test_index] y_train, y_test = Y[train_index], Y[test_index] #train_perc = 0.7 #split_point = int(train_perc*len(train_index)) # valid_index = train_index[split_point:] # train_index = train_index[:split_point] # X_train, X_valid, X_test = X[train_index], X[valid_index], X[test_index] # y_train, y_valid, y_test = Y[train_index], Y[valid_index], Y[test_index] #print("TRAIN:", train_index, "VALID:", valid_index, "TEST:", test_index) X_train, X_valid, y_train, y_valid = train_test_split( X_train, y_train, test_size=0.3, random_state=seed) pool_classifiers.fit(X_train, y_train) validation_data, validation_labels = get_validation_data( X_valid, y_valid, 0.5, hardness=hardness) dynamic_selection_algorithm = None try: if args.dynamic_selection == True and args.dynamic_algorithm is None: raise ValueError( 'Dynamic selection requires you provide an algorithm.') elif args.dynamic_selection == True and args.dynamic_algorithm is not None: if args.dynamic_algorithm == 'ola': dynamic_selection_algorithm = OLA(pool_classifiers, random_state=seed) elif args.dynamic_algorithm == 'lca': dynamic_selection_algorithm = LCA(pool_classifiers, random_state=seed) elif args.dynamic_algorithm == 'mcb': dynamic_selection_algorithm = MCB(pool_classifiers, random_state=seed) elif args.dynamic_algorithm == 'knorau': dynamic_selection_algorithm = KNORAU(pool_classifiers, random_state=seed) elif args.dynamic_algorithm == 'kne': dynamic_selection_algorithm = KNORAE(pool_classifiers, random_state=seed) dynamic_selection_algorithm.fit(validation_data, validation_labels) preds = dynamic_selection_algorithm.predict(X_test) else: # Static combination by voting preds = voting(X_test, pool_classifiers) except Exception as error: raise error acc = get_accuracy_score(y_test, preds) g1 = get_g1_score(y_test, preds, average='macro') f1 = get_f1_score(y_test, preds) roc = roc_auc_score(y_test, preds, average='macro') return dict(f1=f1, g1=g1, acc=acc, roc=roc)
def test_select(index, expected): query = np.atleast_2d([1, 1]) knora_e_test = KNORAE(create_pool_classifiers()) knora_e_test.fit(X_dsel_ex1, y_dsel_ex1) neighbors = neighbors_ex1[index, :].reshape(1, -1) competences = knora_e_test.estimate_competence(query, neighbors) selected = knora_e_test.select(competences) assert np.array_equal(selected, expected)
def test_select(index, expected, create_pool_classifiers, example_estimate_competence): X, y, neighbors, distances, _, _ = example_estimate_competence knora_e_test = KNORAE(create_pool_classifiers) knora_e_test.fit(X, y) neighbors = neighbors[index, :].reshape(1, -1) distances = distances[index, :].reshape(1, -1) competences = knora_e_test.estimate_competence(neighbors, distances=distances) selected = knora_e_test.select(competences) assert np.array_equal(selected, expected)
def ensemble_model(self, ensemble=None): if ensemble is not None: self.ensemble = ensemble if self.moo_ is None: self.moo_ = monise(weightedScalar=self.scalarization, singleScalar=self.scalarization, nodeTimeLimit=2, targetSize=150, targetGap=0, nodeGap=0.01, norm=False) self.moo_.optimize() self.solutions_ = [] for solution in self.moo_.solutionsList: self.solutions_.append(solution.x) if self.solutions_ is None: self.solutions_ = [] for solution in self.moo_.solutionsList: self.solutions_.append(solution.x) if self.ensemble in ['voting', 'voting hard']: models_t = [ ("Model " + str(i), self.solutions_[i]) for i in range(len(self.solutions_)) ] ensemble_model = SimpleVoting(estimators=models_t) if self.ensemble == 'voting soft': models_t = [ ("Model " + str(i), self.solutions_[i]) for i in range(len(self.solutions_)) ] ensemble_model = SimpleVoting(estimators=models_t, voting='soft') if self.ensemble == 'knorau': ensemble_model = KNORAU(self.solutions_) ensemble_model.fit(self.X_val, self.y_val) if self.ensemble == 'knorae': ensemble_model = KNORAE(self.solutions_) ensemble_model.fit(self.X_val, self.y_val) return ensemble_model
random_state=rng) # Training a random forest to be used as the pool of classifiers. # We set the maximum depth of the tree so that it # can estimate probabilities pool_classifiers = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=rng) pool_classifiers.fit(X_train, y_train) stacked = StackedClassifier(pool_classifiers, LogisticRegression()) stacked.fit(X_dsel, y_dsel) # Initialize a DS technique. Here we specify the size of # the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers, random_state=rng) kne = KNORAE(pool_classifiers, k=5, random_state=rng) desp = DESP(pool_classifiers, k=5, random_state=rng) ola = OLA(pool_classifiers, k=5, random_state=rng) mcb = MCB(pool_classifiers, k=5, random_state=rng) meta = METADES(pool_classifiers, k=5, random_state=rng) # Fit the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) ############################################################################### # Plotting the results
extra_clf = ExtraTreesClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1, random_state=42) svm_clf = SVC(probability=True, kernel="linear", C=float("inf")) # fit and predict rnd_clf.fit(X_train, y_train) extra_clf.fit(X_train, y_train) svm_clf.fit(X_train, y_train) hard_voting_clf = VotingClassifier(estimators=[('rf', rnd_clf), ('ex', ), ('svc', svm_clf)], voting='hard') # hard voting soft_voting_clf = VotingClassifier(estimators=[('rf', rnd_clf), ('ex', ), ('svc', svm_clf)], voting='soft') # soft voting # show each classifier's accuarcy score for clf in (rnd_clf, extra_clf, svm_clf, hard_voting_clf, soft_voting_clf): clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print(clf.__class__.__name__, accuracy_score(y_test, y_pred)) # Exercise: 9 knorae = KNORAE(rnd_clf) knorae.fit(X_dsel, y_dsel) print(knorae.__class__.__name__, accuracy_score(y_test, y_pred))
def test_kne(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, knn_classifier=knn_methods) kne.fit(X_dsel, y_dsel) assert np.isclose(kne.score(X_test, y_test), 0.973404255319148)
model_xgboost = get_xgboost_classifier(n_classes).fit( X_train, y_train) model_rf = RandomForestClassifier(n_estimators=200).fit( X_train, y_train) pool_classifiers = [ model_nb, model_knn, model_lr, model_xgboost, model_rf ] # '''If pool_classifiers = None default RandomForestClassifier(n_estimators=200)''' elif n_classifiers == 0: pool_classifiers = None #------------------- KONARAE TRAINING -------------------------------------- train_time_kne_start = time.time() kne = KNORAE(pool_classifiers) kne.fit(X_dsel, y_dsel) train_time_kne_end = time.time() #------------------- META DES TRAINING ------------------------------------- train_time_mdes_start = time.time() meta = METADES(pool_classifiers) meta.fit(X_dsel, y_dsel) train_time_mdes_end = time.time() #------------------- KONARAU TRAINING -------------------------------------- train_time_knu_start = time.time() knu = KNORAU(pool_classifiers) knu.fit(X_dsel, y_dsel) train_time_knu_end = time.time()
from unittest import TestCase from sklearn.ensemble import RandomForestClassifier from app.keras import logistic_regression class TestLogisticRegression(TestCase): def test_log_reg(self): # Arrange # Act logistic_regression.main() from deslib.des.knora_e import KNORAE # Train a pool of 10 classifiers pool_classifiers = RandomForestClassifier(n_estimators=10) pool_classifiers.fit(X_train, y_train) # Initialize the DES model knorae = KNORAE(pool_classifiers) # Preprocess the Dynamic Selection dataset (DSEL) knorae.fit(X_dsel, y_dsel) # Predict new examples: knorae.predict(X_test)
y = data.target # split the data into training and test data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) RF = RandomForestClassifier() RF.fit(X_train, y_train) X_train, X_dsel, y_train, y_dsel = train_test_split(X, y, test_size=0.50) # Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it # can estimate probabilities pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5) pool_classifiers.fit(X_train, y_train) # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers, k=5) desp = DESP(pool_classifiers, k=5) ola = OLA(pool_classifiers, k=5) mcb = MCB(pool_classifiers, k=5) meta = METADES(pool_classifiers, k=5) # Fit the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) # Calculate classification accuracy of each technique print('Classification accuracy RF: ', RF.score(X_test, y_test))
def test_predict_proba(create_X_y): X, y = create_X_y clf1 = Perceptron() clf1.fit(X, y) KNORAE([clf1, clf1]).fit(X, y)
def test_predict_proba(): X = X_dsel_ex1 y = y_dsel_ex1 clf1 = Perceptron() clf1.fit(X, y) KNORAE([clf1, clf1])
def test_check_estimator(): check_estimator(KNORAE())
random_state=rng) pool_classifiers = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) # Setting up static methods. stacked = StackedClassifier(pool_classifiers) static_selection = StaticSelection(pool_classifiers) single_best = SingleBest(pool_classifiers) # Initialize a DS technique. Here we specify the size of # the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers, random_state=rng) kne = KNORAE(pool_classifiers, random_state=rng) desp = DESP(pool_classifiers, random_state=rng) ola = OLA(pool_classifiers, random_state=rng) mcb = MCB(pool_classifiers, random_state=rng) knop = KNOP(pool_classifiers, random_state=rng) meta = METADES(pool_classifiers, random_state=rng) names = [ 'Single Best', 'Static Selection', 'Stacked', 'KNORA-U', 'KNORA-E', 'DES-P', 'OLA', 'MCB', 'KNOP', 'META-DES' ] methods = [ single_best, static_selection, stacked, knorau, kne, desp, ola, mcb, knop, meta ]
def test_kne(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, DFP=True) kne.fit(X_dsel, y_dsel) assert np.isclose(kne.score(X_test, y_test), 0.9)
plot_classifier_decision(ax, clf, X_train) ax.set_xlim((0, 1)) ax.set_ylim((0, 1)) plt.show() plt.tight_layout() ############################################################################### # Comparison with Dynamic Selection techniques # -------------------------------------------- # # We will now consider four DS methods: k-Nearest Oracle-Eliminate (KNORA-E), # Dynamic Ensemble Selection performance (DES-P), Overall Local Accuracy (OLA) # and Rank. Let's train the classifiers and plot their decision boundaries: knora_e = KNORAE(pool_classifiers).fit(X_train, y_train) desp = DESP(pool_classifiers).fit(X_train, y_train) ola = OLA(pool_classifiers).fit(X_train, y_train) rank = Rank(pool_classifiers).fit(X_train, y_train) # Plotting the Decision Border of the DS methods. fig2, sub = plt.subplots(2, 2, figsize=(15, 10)) plt.subplots_adjust(wspace=0.4, hspace=0.4) titles = [ 'KNORA-Eliminate', 'DES-P', 'Overall Local Accuracy (OLA)', 'Modified Rank' ] classifiers = [knora_e, desp, ola, rank] for clf, ax, title in zip(classifiers, sub.flatten(), titles): plot_classifier_decision(ax, clf, X_train, mode='filled', alpha=0.4) plot_dataset(X_test, y_test, ax=ax)