def predict(self, X): """Hard decision.""" # print("PREDICT") # Check is fit had been called check_is_fitted(self, "classes_") # Input validation X = check_array(X) if X.shape[1] != self.X_.shape[1]: raise ValueError("number of features does not match") X_dsel = self.previous_X y_dsel = self.previous_y if self.oversampled: ros = RandomOverSampler(random_state=42) X_dsel, y_dsel = ros.fit_resample(X_dsel, y_dsel) if self.desMethod == "KNORAE": des = KNORAE(self.ensemble_, random_state=42) elif self.desMethod == "KNORAU": des = KNORAU(self.ensemble_, random_state=42) elif self.desMethod == "LCA": des = LCA(self.ensemble_, random_state=42) elif self.desMethod == "Rank": des = Rank(self.ensemble_, random_state=42) else: des = KNORAE(self.ensemble_, random_state=42) des.fit(X_dsel, y_dsel) prediction = des.predict(X) return prediction
def initialize_ds(pool_classifiers, X, y, k=5): knorau = KNORAU(pool_classifiers, k=k) kne = KNORAE(pool_classifiers, k=k) desknn = DESKNN(pool_classifiers, k=k) ola = OLA(pool_classifiers, k=k) lca = LCA(pool_classifiers, k=k) mla = MLA(pool_classifiers, k=k) mcb = MCB(pool_classifiers, k=k) rank = Rank(pool_classifiers, k=k) knop = KNOP(pool_classifiers, k=k) meta = METADES(pool_classifiers, k=k) list_ds = [knorau, kne, ola, lca, mla, desknn, mcb, rank, knop, meta] names = [ 'KNORA-U', 'KNORA-E', 'OLA', 'LCA', 'MLA', 'DESKNN', 'MCB', 'RANK', 'KNOP', 'META-DES' ] # fit the ds techniques for ds in list_ds: ds.fit(X, y) return list_ds, names
def predict(self, X): """Hard decision.""" # Input validation X = check_array(X) if X.shape[1] != self.X_.shape[1]: raise ValueError("number of features does not match") if self.des == "KNORAU": des = KNORAU(pool_classifiers=self.estimators_, random_state=42) des.fit(self.X_, self.y_) prediction = des.predict(X) elif self.des == "DESIRE": des = DESIRE(ensemble=self.estimators_, random_state=42, mode="whole", w=self.w) des.fit(self.X_, self.y_) prediction = des.predict(X) elif self.des == "DESIREC": des = DESIRE(ensemble=self.estimators_, random_state=42, mode="correct", w=self.w) des.fit(self.X_, self.y_) prediction = des.predict(X) elif self.des == "DESIREW": des = DESIRE(ensemble=self.estimators_, random_state=42, mode="wrong", w=self.w) des.fit(self.X_, self.y_) prediction = des.predict(X) else: esm = self.ensemble_support_matrix(X) average_support = np.mean(esm, axis=0) prediction = np.argmax(average_support, axis=1) return prediction
def test_knorau(knn_methods): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() knorau = KNORAU(pool_classifiers, knn_classifier=knn_methods) knorau.fit(X_dsel, y_dsel) assert np.isclose(knorau.score(X_test, y_test), 0.9787234042553191)
model_tree = DecisionTreeClassifier(random_state=rng).fit(X_train, y_train) model_knn = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train) pool_classifiers = [ model_perceptron, model_svc, model_bayes, model_tree, model_knn ] voting_classifiers = [("perceptron", model_perceptron), ("svc", model_svc), ("bayes", model_bayes), ("tree", model_tree), ("knn", model_knn)] model_voting = VotingClassifier(estimators=voting_classifiers).fit( X_train, y_train) # Initializing the techniques knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers) desp = DESP(pool_classifiers) metades = METADES(pool_classifiers, mode='hybrid') # DCS techniques ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers) ############################################################################## # Adding stacked classifier as baseline comparison. Stacked classifier can # be found in the static module. In this experiment we consider two types # of stacking: one using logistic regression as meta-classifier # (default configuration) and the other using a Decision Tree. stacked_lr = StackedClassifier(pool_classifiers, random_state=rng) stacked_dt = StackedClassifier(pool_classifiers, random_state=rng,
random_state=rng) pool_classifiers.fit(X_train, y_train) ############################################################################### # Setting DS method to use the switch mechanism # ---------------------------------------------- # In order to activate the functionality to switch between DS and KNN according # to the instance hardness level we need to set the DS techniques to use this # information. This is done by setting the hyperparameter `with_IH` to True. # In this example we consider four different values for te threshold mcb = MCB(pool_classifiers, with_IH=True, random_state=rng) ola = OLA(pool_classifiers, with_IH=True, random_state=rng) rank = Rank(pool_classifiers, with_IH=True, random_state=rng) des_p = DESP(pool_classifiers, with_IH=True, random_state=rng) kne = KNORAE(pool_classifiers, with_IH=True, random_state=rng) knu = KNORAU(pool_classifiers, with_IH=True, random_state=rng) list_ih_values = [0.0, 1. / 7., 2. / 7., 3. / 7.] list_ds_methods = [ method.fit(X_train, y_train) for method in [mcb, ola, rank, des_p, kne, knu] ] names = ['MCB', 'OLA', 'Mod. Rank', 'DES-P', 'KNORA-E', 'KNORA-U'] # Plot accuracy x IH fig, ax = plt.subplots() for ds_method, name in zip(list_ds_methods, names): accuracy = [] for idx_ih, ih_rate in enumerate([0.0, 0.14, 0.28, 0.42]): ds_method.IH_rate = ih_rate accuracy.append(ds_method.score(X_test, y_test))
test_size=0.50, random_state=rng) pool_classifiers = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) # Setting up static methods. stacked = StackedClassifier(pool_classifiers) static_selection = StaticSelection(pool_classifiers) single_best = SingleBest(pool_classifiers) # Initialize a DS technique. Here we specify the size of # the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers, random_state=rng) kne = KNORAE(pool_classifiers, random_state=rng) desp = DESP(pool_classifiers, random_state=rng) ola = OLA(pool_classifiers, random_state=rng) mcb = MCB(pool_classifiers, random_state=rng) knop = KNOP(pool_classifiers, random_state=rng) meta = METADES(pool_classifiers, random_state=rng) names = [ 'Single Best', 'Static Selection', 'Stacked', 'KNORA-U', 'KNORA-E', 'DES-P', 'OLA', 'MCB', 'KNOP', 'META-DES' ] methods = [ single_best, static_selection, stacked, knorau, kne, desp, ola, mcb, knop, meta
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) # Normalizing the dataset to have 0 mean and unit variance. scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) pool_classifiers = BaggingClassifier(Perceptron(max_iter=100), random_state=rng) pool_classifiers.fit(X_train, y_train) # Setting with_IH mcb = MCB(pool_classifiers) ola = OLA(pool_classifiers) des_p = DESP(pool_classifiers) knu = KNORAU(pool_classifiers) lca = LCA(pool_classifiers) kne = KNORAE(pool_classifiers) rank = Rank(pool_classifiers) list_ds_methods = [mcb, ola, des_p, knu, lca, kne, rank] names = ['MCB', 'OLA', 'DES-P', 'KNORA-U', 'LCA', 'KNORA-E', 'Rank'] k_value_list = range(3, 16) ############################################################################### # Plot accuracy x region of competence size. # ------------------------------------------- # We can see the this parameter can have a huge influence in the performance # of certain DS techniques. The main exception being the KNORA-E and Rank # which have built-in mechanism to automatically adjust the region # of competence size during the competence level estimation.
def predict(self, X): # Check is fit had been called check_is_fitted(self, "classes_") # Input validation X = check_array(X) if X.shape[1] != self.X_.shape[1]: raise ValueError("number of features does not match") X_dsel = self.previous_X y_dsel = self.previous_y unique, counts = np.unique(y_dsel, return_counts=True) k_neighbors = 5 if counts[0] - 1 < 5: k_neighbors = counts[0] - 1 if self.oversampler == "SMOTE" and k_neighbors > 0: smote = SMOTE(random_state=42, k_neighbors=k_neighbors) X_dsel, y_dsel = smote.fit_resample(X_dsel, y_dsel) elif self.oversampler == "svmSMOTE" and k_neighbors > 0: try: svmSmote = SVMSMOTE(random_state=42, k_neighbors=k_neighbors) X_dsel, y_dsel = svmSmote.fit_resample(X_dsel, y_dsel) except ValueError: pass elif self.oversampler == "borderline1" and k_neighbors > 0: borderlineSmote1 = BorderlineSMOTE(random_state=42, k_neighbors=k_neighbors, kind='borderline-1') X_dsel, y_dsel = borderlineSmote1.fit_resample(X_dsel, y_dsel) elif self.oversampler == "borderline2" and k_neighbors > 0: borderlineSmote2 = BorderlineSMOTE(random_state=42, k_neighbors=k_neighbors, kind='borderline-2') X_dsel, y_dsel = borderlineSmote2.fit_resample(X_dsel, y_dsel) elif self.oversampler == "ADASYN" and k_neighbors > 0: try: adasyn = ADASYN(random_state=42, n_neighbors=k_neighbors) X_dsel, y_dsel = adasyn.fit_resample(X_dsel, y_dsel) except RuntimeError: pass except ValueError: pass elif self.oversampler == "SLS" and k_neighbors > 0: sls = Safe_Level_SMOTE(n_neighbors=k_neighbors) X_dsel, y_dsel = sls.sample(X_dsel, y_dsel) if self.desMethod == "KNORAE": des = KNORAE(self.ensemble_, random_state=42) elif self.desMethod == "KNORAU": des = KNORAU(self.ensemble_, random_state=42) elif self.desMethod == "KNN": des = DESKNN(self.ensemble_, random_state=42) elif self.desMethod == "Clustering": des = DESClustering(self.ensemble_, random_state=42) else: des = KNORAE(self.ensemble_, random_state=42) if len(self.ensemble_) < 2: prediction = self.ensemble_[0].predict(X) else: des.fit(X_dsel, y_dsel) prediction = des.predict(X) return prediction