def predict(self, X):
        """Hard decision."""
        # print("PREDICT")
        # Check is fit had been called
        check_is_fitted(self, "classes_")

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        X_dsel = self.previous_X
        y_dsel = self.previous_y

        if self.oversampled:
            ros = RandomOverSampler(random_state=42)
            X_dsel, y_dsel = ros.fit_resample(X_dsel, y_dsel)

        if self.desMethod == "KNORAE":
            des = KNORAE(self.ensemble_, random_state=42)
        elif self.desMethod == "KNORAU":
            des = KNORAU(self.ensemble_, random_state=42)
        elif self.desMethod == "LCA":
            des = LCA(self.ensemble_, random_state=42)
        elif self.desMethod == "Rank":
            des = Rank(self.ensemble_, random_state=42)
        else:
            des = KNORAE(self.ensemble_, random_state=42)

        des.fit(X_dsel, y_dsel)
        prediction = des.predict(X)

        return prediction
Exemple #2
0
def test_kne_proba(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, knn_classifier=knn_methods, voting='soft')
    kne.fit(X_dsel, y_dsel)
    probas = kne.predict_proba(X_test)
    expected = np.load(
        'deslib/tests/expected_values/kne_proba_integration.npy')
    assert np.allclose(probas, expected)
Exemple #3
0
def test_knorae_subspaces():
    rng = np.random.RandomState(123456)
    X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(None, rng)
    pool = BaggingClassifier(LogisticRegression(),
                             max_features=0.5,
                             random_state=rng).fit(X_train, y_train)

    knorae = KNORAE(pool)
    knorae.fit(X_dsel, y_dsel)
    y_pred = knorae.predict_proba(X_test).argmax(axis=1)
    assert np.isclose(accuracy_score(y_pred, y_test), 0.9787234042553191)
Exemple #4
0
def test_knorae_subspaces():
    rng = np.random.RandomState(123456)
    X_dsel, X_test, X_train, y_dsel, y_test, y_train = load_dataset(None, rng)
    # split the data into training and test data
    pool = BaggingClassifier(LogisticRegression(),
                             max_features=0.5,
                             random_state=rng).fit(X_train, y_train)

    knorae = KNORAE(pool)
    knorae.fit(X_dsel, y_dsel)
    assert np.isclose(knorae.score(X_test, y_test), 0.9787234042553191)
Exemple #5
0
def test_grid_search():
    # This tests if the estimator can be cloned and used in a grid search
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    kne = KNORAE(pool_classifiers)
    params = {'k': [1, 3, 5, 7]}
    grid = GridSearchCV(kne, params)
    grid.fit(X_dsel, y_dsel)
    grid.best_estimator_.score(X_test, y_test)
Exemple #6
0
    def predict(self, X):
        """
        Predict classes for X.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        y : array-like, shape (n_samples, )
            The predicted classes.
        """

        # Check is fit had been called
        check_is_fitted(self, "classes_")
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        if self.des == "None":
            esm = self.ensemble_support_matrix(X)
            average_support = np.mean(esm, axis=0)
            prediction = np.argmax(average_support, axis=1)
        elif self.des == "KNORAU1":
            des = KNORAU(ensemble=self.ensemble_)
            des.fit(self.dsel_X_, self.dsel_y_)
            prediction = des.predict(X)
        elif self.des == "KNORAU2":
            des = KNORAU(ensemble=self.ensemble_base_)
            des.fit(self.dsel_X_, self.dsel_y_)
            prediction = des.predict(X)
        elif self.des == "KNORAE1":
            des = KNORAE(pool_classifiers=self.ensemble_, random_state=42)
            des.fit(self.dsel_X_, self.dsel_y_)
            prediction = des.predict(X)
        elif self.des == "KNORAE2":
            des = KNORAE(pool_classifiers=self.ensemble_base_, random_state=42)
            des.fit(self.dsel_X_, self.dsel_y_)
            prediction = des.predict(X)

        # Return prediction
        return self.classes_[prediction]
Exemple #7
0
def initialize_ds(pool_classifiers, X, y, k=5):
    knorau = KNORAU(pool_classifiers, k=k)
    kne = KNORAE(pool_classifiers, k=k)
    desknn = DESKNN(pool_classifiers, k=k)
    ola = OLA(pool_classifiers, k=k)
    lca = LCA(pool_classifiers, k=k)
    mla = MLA(pool_classifiers, k=k)
    mcb = MCB(pool_classifiers, k=k)
    rank = Rank(pool_classifiers, k=k)
    knop = KNOP(pool_classifiers, k=k)
    meta = METADES(pool_classifiers, k=k)

    list_ds = [knorau, kne, ola, lca, mla, desknn, mcb, rank, knop, meta]
    names = [
        'KNORA-U', 'KNORA-E', 'OLA', 'LCA', 'MLA', 'DESKNN', 'MCB', 'RANK',
        'KNOP', 'META-DES'
    ]
    # fit the ds techniques
    for ds in list_ds:
        ds.fit(X, y)

    return list_ds, names
Exemple #8
0
def test_kne(knn_methods, voting):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    kne = KNORAE(pool_classifiers, knn_classifier=knn_methods, voting=voting)
    kne.fit(X_dsel, y_dsel)
    assert np.isclose(kne.score(X_test, y_test), 0.9787234042553191)
model_knn = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train)

pool_classifiers = [
    model_perceptron, model_svc, model_bayes, model_tree, model_knn
]

voting_classifiers = [("perceptron", model_perceptron), ("svc", model_svc),
                      ("bayes", model_bayes), ("tree", model_tree),
                      ("knn", model_knn)]

model_voting = VotingClassifier(estimators=voting_classifiers).fit(
    X_train, y_train)

# Initializing the techniques
knorau = KNORAU(pool_classifiers)
kne = KNORAE(pool_classifiers)
desp = DESP(pool_classifiers)
metades = METADES(pool_classifiers, mode='hybrid')
# DCS techniques
ola = OLA(pool_classifiers)
mcb = MCB(pool_classifiers)

##############################################################################
# Adding stacked classifier as baseline comparison. Stacked classifier can
# be found in the static module. In this experiment we consider two types
# of stacking: one using logistic regression as meta-classifier
# (default configuration) and the other using a Decision Tree.
stacked_lr = StackedClassifier(pool_classifiers, random_state=rng)
stacked_dt = StackedClassifier(pool_classifiers,
                               random_state=rng,
                               meta_classifier=DecisionTreeClassifier())
Exemple #10
0
pool_classifiers = BaggingClassifier(DecisionTreeClassifier(random_state=rng),
                                     random_state=rng)
pool_classifiers.fit(X_train, y_train)

###############################################################################
# Setting DS method to use the switch mechanism
# ----------------------------------------------
# In order to activate the functionality to switch between DS and KNN according
# to the instance hardness level we need to set the DS techniques to use this
# information. This is done by setting the hyperparameter `with_IH` to True.
# In this example we consider four different values for te threshold
mcb = MCB(pool_classifiers, with_IH=True, random_state=rng)
ola = OLA(pool_classifiers, with_IH=True, random_state=rng)
rank = Rank(pool_classifiers, with_IH=True, random_state=rng)
des_p = DESP(pool_classifiers, with_IH=True, random_state=rng)
kne = KNORAE(pool_classifiers, with_IH=True, random_state=rng)
knu = KNORAU(pool_classifiers, with_IH=True, random_state=rng)
list_ih_values = [0.0, 1. / 7., 2. / 7., 3. / 7.]

list_ds_methods = [
    method.fit(X_train, y_train)
    for method in [mcb, ola, rank, des_p, kne, knu]
]
names = ['MCB', 'OLA', 'Mod. Rank', 'DES-P', 'KNORA-E', 'KNORA-U']

# Plot accuracy x IH
fig, ax = plt.subplots()
for ds_method, name in zip(list_ds_methods, names):
    accuracy = []
    for idx_ih, ih_rate in enumerate([0.0, 0.14, 0.28, 0.42]):
        ds_method.IH_rate = ih_rate
Exemple #11
0
# Normalizing the dataset to have 0 mean and unit variance.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pool_classifiers = BaggingClassifier(Perceptron(max_iter=100),
                                     random_state=rng)
pool_classifiers.fit(X_train, y_train)

# Setting with_IH
mcb = MCB(pool_classifiers)
ola = OLA(pool_classifiers)
des_p = DESP(pool_classifiers)
knu = KNORAU(pool_classifiers)
lca = LCA(pool_classifiers)
kne = KNORAE(pool_classifiers)
rank = Rank(pool_classifiers)
list_ds_methods = [mcb, ola, des_p, knu, lca, kne, rank]
names = ['MCB', 'OLA', 'DES-P', 'KNORA-U', 'LCA', 'KNORA-E', 'Rank']

k_value_list = range(3, 16)

###############################################################################
# Plot accuracy x region of competence size.
# -------------------------------------------
# We can see the this parameter can have a huge influence in the performance
# of certain DS techniques. The main exception being the KNORA-E and Rank
# which have built-in mechanism to automatically adjust the region
# of competence size during the competence level estimation.

fig, ax = plt.subplots()
Exemple #12
0
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, "classes_")

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        X_dsel = self.previous_X
        y_dsel = self.previous_y

        unique, counts = np.unique(y_dsel, return_counts=True)

        k_neighbors = 5
        if counts[0] - 1 < 5:
            k_neighbors = counts[0] - 1

        if self.oversampler == "SMOTE" and k_neighbors > 0:
            smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
            X_dsel, y_dsel = smote.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "svmSMOTE" and k_neighbors > 0:
            try:
                svmSmote = SVMSMOTE(random_state=42, k_neighbors=k_neighbors)
                X_dsel, y_dsel = svmSmote.fit_resample(X_dsel, y_dsel)
            except ValueError:
                pass
        elif self.oversampler == "borderline1" and k_neighbors > 0:
            borderlineSmote1 = BorderlineSMOTE(random_state=42,
                                               k_neighbors=k_neighbors,
                                               kind='borderline-1')
            X_dsel, y_dsel = borderlineSmote1.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "borderline2" and k_neighbors > 0:
            borderlineSmote2 = BorderlineSMOTE(random_state=42,
                                               k_neighbors=k_neighbors,
                                               kind='borderline-2')
            X_dsel, y_dsel = borderlineSmote2.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "ADASYN" and k_neighbors > 0:
            try:
                adasyn = ADASYN(random_state=42, n_neighbors=k_neighbors)
                X_dsel, y_dsel = adasyn.fit_resample(X_dsel, y_dsel)
            except RuntimeError:
                pass
            except ValueError:
                pass
        elif self.oversampler == "SLS" and k_neighbors > 0:
            sls = Safe_Level_SMOTE(n_neighbors=k_neighbors)
            X_dsel, y_dsel = sls.sample(X_dsel, y_dsel)

        if self.desMethod == "KNORAE":
            des = KNORAE(self.ensemble_, random_state=42)
        elif self.desMethod == "KNORAU":
            des = KNORAU(self.ensemble_, random_state=42)
        elif self.desMethod == "KNN":
            des = DESKNN(self.ensemble_, random_state=42)
        elif self.desMethod == "Clustering":
            des = DESClustering(self.ensemble_, random_state=42)
        else:
            des = KNORAE(self.ensemble_, random_state=42)

        if len(self.ensemble_) < 2:
            prediction = self.ensemble_[0].predict(X)
        else:
            des.fit(X_dsel, y_dsel)
            prediction = des.predict(X)

        return prediction
Exemple #13
0
# Setting up the random state to have consistent results
rng = np.random.RandomState(42)

# Generate a classification dataset
X, y = make_classification(n_samples=1000, random_state=rng)
# split the data into training and test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
                                                    random_state=rng)

# Split the data into training and DSEL for DS techniques
X_train, X_dsel, y_train, y_dsel = train_test_split(X_train, y_train,
                                                    test_size=0.5,
                                                    random_state=rng)

# Initialize the DS techniques. DS methods can be initialized without
# specifying a single input parameter. In this example, we just pass the random
# state in order to always have the same result.
kne = KNORAE(random_state=rng)
meta = METADES(random_state=rng)

# Fitting the des techniques
kne.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)

# Calculate classification accuracy of each technique
print('Evaluating DS techniques:')
print('Classification accuracy KNORA-Eliminate: ',
      kne.score(X_test, y_test))
print('Classification accuracy META-DES: ', meta.score(X_test, y_test))