from sklearn.metrics import accuracy_score from scipy.stats import rankdata from scipy.stats import ttest_ind from tabulate import tabulate from copy import deepcopy import matplotlib.pyplot as plt import matplotlib.ticker as mticker from sklearn.preprocessing import normalize import os state = 2404 clf_pool = { "My DES_KNN": DES_KNN(random_state=state), "DES_KNN": DESKNN(random_state=state), "KNORA-U": KNORAU(), "KNORA-E": KNORAE(), "ADABoost": AdaBoostClassifier(), } def test(clf_pool, data, method=None): dataset = "./datasets/" + data dataset = np.genfromtxt("%s.csv" % (dataset), delimiter=",") X = dataset[:, :-1] y = dataset[:, -1].astype(int) n_splits = 5 n_repeats = 3 rskf = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats,
X_test = scaler.transform(X_test) # Split the data into training and DSEL for DS techniques X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train, test_size=0.5, random_state=rng) # Train a pool of 100 base classifiers pool_classifiers = BaggingClassifier(Perceptron(max_iter=10), n_estimators=100, random_state=rng) pool_classifiers.fit(X_train, y_train) # Initialize the DS techniques knorau = KNORAU(pool_classifiers) kne = KNORAE(pool_classifiers) desp = DESP(pool_classifiers) ola = OLA(pool_classifiers) mcb = MCB(pool_classifiers, random_state=rng) ############################################################################### # Calibrating base classifiers # ----------------------------- # Some dynamic selection techniques requires that the base classifiers estimate # probabilities in order to estimate its competence level. Since the Perceptron # model is not a probabilistic classifier (does not implements the # predict_proba method, it needs to be calibrated for # probability estimation before being used by such DS techniques. This step can # be conducted using the CalibrateClassifierCV class from scikit-learn. Note # that in this example we pass a prefited pool of classifiers to the
random_state=rng) # Training a random forest to be used as the pool of classifiers. # We set the maximum depth of the tree so that it # can estimate probabilities pool_classifiers = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=rng) pool_classifiers.fit(X_train, y_train) stacked = StackedClassifier(pool_classifiers, LogisticRegression()) stacked.fit(X_dsel, y_dsel) # Initialize a DS technique. Here we specify the size of # the region of competence (5 neighbors) knorau = KNORAU(pool_classifiers, random_state=rng) kne = KNORAE(pool_classifiers, k=5, random_state=rng) desp = DESP(pool_classifiers, k=5, random_state=rng) ola = OLA(pool_classifiers, k=5, random_state=rng) mcb = MCB(pool_classifiers, k=5, random_state=rng) meta = METADES(pool_classifiers, k=5, random_state=rng) # Fit the DS techniques knorau.fit(X_dsel, y_dsel) kne.fit(X_dsel, y_dsel) desp.fit(X_dsel, y_dsel) meta.fit(X_dsel, y_dsel) ola.fit(X_dsel, y_dsel) mcb.fit(X_dsel, y_dsel) ###############################################################################
def test_knorau(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() knorau = KNORAU(pool_classifiers, DFP=True) knorau.fit(X_dsel, y_dsel) assert np.isclose(knorau.score(X_test, y_test), 0.90606060606060601)
def test_knorau(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers() knorau = KNORAU(pool_classifiers) knorau.fit(X_dsel, y_dsel) assert np.isclose(knorau.score(X_test, y_test), 0.97340425531914898)
def test_label_encoder_integration_sklearn_ensembles(): pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers(encode_labels=['no', 'yes']) knorau = KNORAU(pool_classifiers) knorau.fit(X_dsel, y_dsel) assert np.isclose(knorau.score(X_test, y_test), 0.97340425531914898)