Ejemplo n.º 1
0
    def predict(self, X):
        """Hard decision."""
        # print("PREDICT")
        # Check is fit had been called
        check_is_fitted(self, "classes_")

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        X_dsel = self.previous_X
        y_dsel = self.previous_y

        if self.oversampled:
            ros = RandomOverSampler(random_state=42)
            X_dsel, y_dsel = ros.fit_resample(X_dsel, y_dsel)

        if self.desMethod == "KNORAE":
            des = KNORAE(self.ensemble_, random_state=42)
        elif self.desMethod == "KNORAU":
            des = KNORAU(self.ensemble_, random_state=42)
        elif self.desMethod == "LCA":
            des = LCA(self.ensemble_, random_state=42)
        elif self.desMethod == "Rank":
            des = Rank(self.ensemble_, random_state=42)
        else:
            des = KNORAE(self.ensemble_, random_state=42)

        des.fit(X_dsel, y_dsel)
        prediction = des.predict(X)

        return prediction
Ejemplo n.º 2
0
def initialize_ds(pool_classifiers, X, y, k=5):
    knorau = KNORAU(pool_classifiers, k=k)
    kne = KNORAE(pool_classifiers, k=k)
    desknn = DESKNN(pool_classifiers, k=k)
    ola = OLA(pool_classifiers, k=k)
    lca = LCA(pool_classifiers, k=k)
    mla = MLA(pool_classifiers, k=k)
    mcb = MCB(pool_classifiers, k=k)
    rank = Rank(pool_classifiers, k=k)
    knop = KNOP(pool_classifiers, k=k)
    meta = METADES(pool_classifiers, k=k)

    list_ds = [knorau, kne, ola, lca, mla, desknn, mcb, rank, knop, meta]
    names = [
        'KNORA-U', 'KNORA-E', 'OLA', 'LCA', 'MLA', 'DESKNN', 'MCB', 'RANK',
        'KNOP', 'META-DES'
    ]
    # fit the ds techniques
    for ds in list_ds:
        ds.fit(X, y)

    return list_ds, names
Ejemplo n.º 3
0
    def predict(self, X):
        """Hard decision."""

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        if self.des == "KNORAU":
            des = KNORAU(pool_classifiers=self.estimators_, random_state=42)
            des.fit(self.X_, self.y_)
            prediction = des.predict(X)
        elif self.des == "DESIRE":
            des = DESIRE(ensemble=self.estimators_,
                         random_state=42,
                         mode="whole",
                         w=self.w)
            des.fit(self.X_, self.y_)
            prediction = des.predict(X)
        elif self.des == "DESIREC":
            des = DESIRE(ensemble=self.estimators_,
                         random_state=42,
                         mode="correct",
                         w=self.w)
            des.fit(self.X_, self.y_)
            prediction = des.predict(X)
        elif self.des == "DESIREW":
            des = DESIRE(ensemble=self.estimators_,
                         random_state=42,
                         mode="wrong",
                         w=self.w)
            des.fit(self.X_, self.y_)
            prediction = des.predict(X)
        else:
            esm = self.ensemble_support_matrix(X)
            average_support = np.mean(esm, axis=0)
            prediction = np.argmax(average_support, axis=1)

        return prediction
Ejemplo n.º 4
0
def test_knorau(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    knorau = KNORAU(pool_classifiers, knn_classifier=knn_methods)
    knorau.fit(X_dsel, y_dsel)
    assert np.isclose(knorau.score(X_test, y_test), 0.9787234042553191)
Ejemplo n.º 5
0
model_tree = DecisionTreeClassifier(random_state=rng).fit(X_train, y_train)
model_knn = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train)

pool_classifiers = [
    model_perceptron, model_svc, model_bayes, model_tree, model_knn
]

voting_classifiers = [("perceptron", model_perceptron), ("svc", model_svc),
                      ("bayes", model_bayes), ("tree", model_tree),
                      ("knn", model_knn)]

model_voting = VotingClassifier(estimators=voting_classifiers).fit(
    X_train, y_train)

# Initializing the techniques
knorau = KNORAU(pool_classifiers)
kne = KNORAE(pool_classifiers)
desp = DESP(pool_classifiers)
metades = METADES(pool_classifiers, mode='hybrid')
# DCS techniques
ola = OLA(pool_classifiers)
mcb = MCB(pool_classifiers)

##############################################################################
# Adding stacked classifier as baseline comparison. Stacked classifier can
# be found in the static module. In this experiment we consider two types
# of stacking: one using logistic regression as meta-classifier
# (default configuration) and the other using a Decision Tree.
stacked_lr = StackedClassifier(pool_classifiers, random_state=rng)
stacked_dt = StackedClassifier(pool_classifiers,
                               random_state=rng,
Ejemplo n.º 6
0
                                     random_state=rng)
pool_classifiers.fit(X_train, y_train)

###############################################################################
# Setting DS method to use the switch mechanism
# ----------------------------------------------
# In order to activate the functionality to switch between DS and KNN according
# to the instance hardness level we need to set the DS techniques to use this
# information. This is done by setting the hyperparameter `with_IH` to True.
# In this example we consider four different values for te threshold
mcb = MCB(pool_classifiers, with_IH=True, random_state=rng)
ola = OLA(pool_classifiers, with_IH=True, random_state=rng)
rank = Rank(pool_classifiers, with_IH=True, random_state=rng)
des_p = DESP(pool_classifiers, with_IH=True, random_state=rng)
kne = KNORAE(pool_classifiers, with_IH=True, random_state=rng)
knu = KNORAU(pool_classifiers, with_IH=True, random_state=rng)
list_ih_values = [0.0, 1. / 7., 2. / 7., 3. / 7.]

list_ds_methods = [
    method.fit(X_train, y_train)
    for method in [mcb, ola, rank, des_p, kne, knu]
]
names = ['MCB', 'OLA', 'Mod. Rank', 'DES-P', 'KNORA-E', 'KNORA-U']

# Plot accuracy x IH
fig, ax = plt.subplots()
for ds_method, name in zip(list_ds_methods, names):
    accuracy = []
    for idx_ih, ih_rate in enumerate([0.0, 0.14, 0.28, 0.42]):
        ds_method.IH_rate = ih_rate
        accuracy.append(ds_method.score(X_test, y_test))
Ejemplo n.º 7
0
                                                    test_size=0.50,
                                                    random_state=rng)

pool_classifiers = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
                                     n_estimators=100,
                                     random_state=rng)
pool_classifiers.fit(X_train, y_train)

# Setting up static methods.
stacked = StackedClassifier(pool_classifiers)
static_selection = StaticSelection(pool_classifiers)
single_best = SingleBest(pool_classifiers)

# Initialize a DS technique. Here we specify the size of
# the region of competence (5 neighbors)
knorau = KNORAU(pool_classifiers, random_state=rng)
kne = KNORAE(pool_classifiers, random_state=rng)
desp = DESP(pool_classifiers, random_state=rng)
ola = OLA(pool_classifiers, random_state=rng)
mcb = MCB(pool_classifiers, random_state=rng)
knop = KNOP(pool_classifiers, random_state=rng)
meta = METADES(pool_classifiers, random_state=rng)

names = [
    'Single Best', 'Static Selection', 'Stacked', 'KNORA-U', 'KNORA-E',
    'DES-P', 'OLA', 'MCB', 'KNOP', 'META-DES'
]

methods = [
    single_best, static_selection, stacked, knorau, kne, desp, ola, mcb, knop,
    meta
Ejemplo n.º 8
0
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

# Normalizing the dataset to have 0 mean and unit variance.
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pool_classifiers = BaggingClassifier(Perceptron(max_iter=100),
                                     random_state=rng)
pool_classifiers.fit(X_train, y_train)

# Setting with_IH
mcb = MCB(pool_classifiers)
ola = OLA(pool_classifiers)
des_p = DESP(pool_classifiers)
knu = KNORAU(pool_classifiers)
lca = LCA(pool_classifiers)
kne = KNORAE(pool_classifiers)
rank = Rank(pool_classifiers)
list_ds_methods = [mcb, ola, des_p, knu, lca, kne, rank]
names = ['MCB', 'OLA', 'DES-P', 'KNORA-U', 'LCA', 'KNORA-E', 'Rank']

k_value_list = range(3, 16)

###############################################################################
# Plot accuracy x region of competence size.
# -------------------------------------------
# We can see the this parameter can have a huge influence in the performance
# of certain DS techniques. The main exception being the KNORA-E and Rank
# which have built-in mechanism to automatically adjust the region
# of competence size during the competence level estimation.
Ejemplo n.º 9
0
    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self, "classes_")

        # Input validation
        X = check_array(X)
        if X.shape[1] != self.X_.shape[1]:
            raise ValueError("number of features does not match")

        X_dsel = self.previous_X
        y_dsel = self.previous_y

        unique, counts = np.unique(y_dsel, return_counts=True)

        k_neighbors = 5
        if counts[0] - 1 < 5:
            k_neighbors = counts[0] - 1

        if self.oversampler == "SMOTE" and k_neighbors > 0:
            smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
            X_dsel, y_dsel = smote.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "svmSMOTE" and k_neighbors > 0:
            try:
                svmSmote = SVMSMOTE(random_state=42, k_neighbors=k_neighbors)
                X_dsel, y_dsel = svmSmote.fit_resample(X_dsel, y_dsel)
            except ValueError:
                pass
        elif self.oversampler == "borderline1" and k_neighbors > 0:
            borderlineSmote1 = BorderlineSMOTE(random_state=42,
                                               k_neighbors=k_neighbors,
                                               kind='borderline-1')
            X_dsel, y_dsel = borderlineSmote1.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "borderline2" and k_neighbors > 0:
            borderlineSmote2 = BorderlineSMOTE(random_state=42,
                                               k_neighbors=k_neighbors,
                                               kind='borderline-2')
            X_dsel, y_dsel = borderlineSmote2.fit_resample(X_dsel, y_dsel)
        elif self.oversampler == "ADASYN" and k_neighbors > 0:
            try:
                adasyn = ADASYN(random_state=42, n_neighbors=k_neighbors)
                X_dsel, y_dsel = adasyn.fit_resample(X_dsel, y_dsel)
            except RuntimeError:
                pass
            except ValueError:
                pass
        elif self.oversampler == "SLS" and k_neighbors > 0:
            sls = Safe_Level_SMOTE(n_neighbors=k_neighbors)
            X_dsel, y_dsel = sls.sample(X_dsel, y_dsel)

        if self.desMethod == "KNORAE":
            des = KNORAE(self.ensemble_, random_state=42)
        elif self.desMethod == "KNORAU":
            des = KNORAU(self.ensemble_, random_state=42)
        elif self.desMethod == "KNN":
            des = DESKNN(self.ensemble_, random_state=42)
        elif self.desMethod == "Clustering":
            des = DESClustering(self.ensemble_, random_state=42)
        else:
            des = KNORAE(self.ensemble_, random_state=42)

        if len(self.ensemble_) < 2:
            prediction = self.ensemble_[0].predict(X)
        else:
            des.fit(X_dsel, y_dsel)
            prediction = des.predict(X)

        return prediction