def predict(self, X): """Hard decision.""" # print("PREDICT") # Check is fit had been called check_is_fitted(self, "classes_") # Input validation X = check_array(X) if X.shape[1] != self.X_.shape[1]: raise ValueError("number of features does not match") X_dsel = self.previous_X y_dsel = self.previous_y if self.oversampled: ros = RandomOverSampler(random_state=42) X_dsel, y_dsel = ros.fit_resample(X_dsel, y_dsel) if self.desMethod == "KNORAE": des = KNORAE(self.ensemble_, random_state=42) elif self.desMethod == "KNORAU": des = KNORAU(self.ensemble_, random_state=42) elif self.desMethod == "LCA": des = LCA(self.ensemble_, random_state=42) elif self.desMethod == "Rank": des = Rank(self.ensemble_, random_state=42) else: des = KNORAE(self.ensemble_, random_state=42) des.fit(X_dsel, y_dsel) prediction = des.predict(X) return prediction
def test_kne_proba(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, knn_classifier=knn_methods, voting='soft') kne.fit(X_dsel, y_dsel) probas = kne.predict_proba(X_test) expected = np.load( 'deslib/tests/expected_values/kne_proba_integration.npy') assert np.allclose(probas, expected)
def test_knorae_subspaces(): rng = np.random.RandomState(123456) X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(None, rng) pool = BaggingClassifier(LogisticRegression(), max_features=0.5, random_state=rng).fit(X_train, y_train) knorae = KNORAE(pool) knorae.fit(X_dsel, y_dsel) y_pred = knorae.predict_proba(X_test).argmax(axis=1) assert np.isclose(accuracy_score(y_pred, y_test), 0.9787234042553191)
def test_knorae_subspaces(): rng = np.random.RandomState(123456) X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(None, rng) # split the data into training and test data pool = BaggingClassifier(LogisticRegression(), max_features=0.5, random_state=rng).fit(X_train, y_train) knorae = KNORAE(pool) knorae.fit(X_dsel, y_dsel) assert np.isclose(knorae.score(X_test, y_test), 0.9787234042553191)
def test_kne(knn_methods, voting): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() kne = KNORAE(pool_classifiers, knn_classifier=knn_methods, voting=voting) kne.fit(X_dsel, y_dsel) assert np.isclose(kne.score(X_test, y_test), 0.9787234042553191)
# DCS techniques ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers) ############################################################################## # Adding stacked classifier as baseline comparison. Stacked classifier can # be found in the static module. In this experiment we consider two types # of stacking: one using logistic regression as meta-classifier # (default configuration) and the other using a Decision Tree. stacked_lr = StackedClassifier(pool_classifiers, random_state=rng) stacked_dt = StackedClassifier(pool_classifiers, random_state=rng, meta_classifier=DecisionTreeClassifier()) # Fitting the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) metades.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) # Fitting the tacking models stacked_lr.fit(X_dsel, y_dsel) stacked_dt.fit(X_dsel, y_dsel) # Calculate classification accuracy of each technique print('Evaluating DS techniques:') print('Classification accuracy of Majority voting the pool: ', model_voting.score(X_test, y_test)) print('Classification accuracy of KNORA-U: ', knorau.score(X_test, y_test)) print('Classification accuracy of KNORA-E: ', kne.score(X_test, y_test))
def predict(self, X): # Check is fit had been called check_is_fitted(self, "classes_") # Input validation X = check_array(X) if X.shape[1] != self.X_.shape[1]: raise ValueError("number of features does not match") X_dsel = self.previous_X y_dsel = self.previous_y unique, counts = np.unique(y_dsel, return_counts=True) k_neighbors = 5 if counts[0] - 1 < 5: k_neighbors = counts[0] - 1 if self.oversampler == "SMOTE" and k_neighbors > 0: smote = SMOTE(random_state=42, k_neighbors=k_neighbors) X_dsel, y_dsel = smote.fit_resample(X_dsel, y_dsel) elif self.oversampler == "svmSMOTE" and k_neighbors > 0: try: svmSmote = SVMSMOTE(random_state=42, k_neighbors=k_neighbors) X_dsel, y_dsel = svmSmote.fit_resample(X_dsel, y_dsel) except ValueError: pass elif self.oversampler == "borderline1" and k_neighbors > 0: borderlineSmote1 = BorderlineSMOTE(random_state=42, k_neighbors=k_neighbors, kind='borderline-1') X_dsel, y_dsel = borderlineSmote1.fit_resample(X_dsel, y_dsel) elif self.oversampler == "borderline2" and k_neighbors > 0: borderlineSmote2 = BorderlineSMOTE(random_state=42, k_neighbors=k_neighbors, kind='borderline-2') X_dsel, y_dsel = borderlineSmote2.fit_resample(X_dsel, y_dsel) elif self.oversampler == "ADASYN" and k_neighbors > 0: try: adasyn = ADASYN(random_state=42, n_neighbors=k_neighbors) X_dsel, y_dsel = adasyn.fit_resample(X_dsel, y_dsel) except RuntimeError: pass except ValueError: pass elif self.oversampler == "SLS" and k_neighbors > 0: sls = Safe_Level_SMOTE(n_neighbors=k_neighbors) X_dsel, y_dsel = sls.sample(X_dsel, y_dsel) if self.desMethod == "KNORAE": des = KNORAE(self.ensemble_, random_state=42) elif self.desMethod == "KNORAU": des = KNORAU(self.ensemble_, random_state=42) elif self.desMethod == "KNN": des = DESKNN(self.ensemble_, random_state=42) elif self.desMethod == "Clustering": des = DESClustering(self.ensemble_, random_state=42) else: des = KNORAE(self.ensemble_, random_state=42) if len(self.ensemble_) < 2: prediction = self.ensemble_[0].predict(X) else: des.fit(X_dsel, y_dsel) prediction = des.predict(X) return prediction