예제 #1
0
def test_select_none_competent():
    des_p_test = DESP(create_pool_classifiers())
    des_p_test.n_classes = 2
    competences = np.ones(des_p_test.n_classifiers) * 0.49
    indices = des_p_test.select(competences)
    expected = np.array([[True, True, True]])
    assert np.array_equal(expected, indices)
예제 #2
0
def test_desp_proba(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    desp = DESP(pool_classifiers, knn_classifier=knn_methods)
    desp.fit(X_dsel, y_dsel)
    probas = desp.predict_proba(X_test)
    expected = np.load(
        'deslib/tests/expected_values/desp_proba_integration.npy')
    assert np.allclose(probas, expected)
예제 #3
0
def test_desp_proba():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    desp = DESP(pool_classifiers, DFP=True)
    desp.fit(X_dsel, y_dsel)
    probas = desp.predict_proba(X_test)
    expected = np.load('deslib/tests/expected_values/desp_proba_DFP.npy')
    assert np.allclose(probas, expected)
예제 #4
0
def test_select_two_classes():
    des_p_test = DESP()
    des_p_test.n_classes_ = 2
    expected = np.array([[True, False, True], [True, False, True],
                         [False, True, False]])

    competences = np.array([[0.51, 0.0, 0.51], [0.51, 0.0, 0.51],
                            [0.49, 1.0, 0.49]])

    selected = des_p_test.select(competences)

    assert np.array_equal(selected, expected)
예제 #5
0
def test_select_three_classes():
    des_p_test = DESP()
    des_p_test.n_classes_ = 3
    expected = np.array([[True, False, True], [True, False, True],
                         [False, True, False]])

    competences = np.array([[0.34, 0.32, 1.0], [0.50, 0.30, 1.01],
                            [0.25, 1.0, 0.25]])

    selected = des_p_test.select(competences)

    assert np.array_equal(selected, expected)
예제 #6
0
파일: test_desp.py 프로젝트: trasse/DESlib
def test_estimate_competence_batch():
    query = np.ones((3, 2))
    expected = np.array([[0.57142857, 0.4285714, 0.57142857],
                         [0.71428571, 0.2857142, 0.71428571],
                         [0.2857142, 0.71428571, 0.2857142]])

    des_p_test = DESP(create_pool_classifiers())
    des_p_test.fit(X_dsel_ex1, y_dsel_ex1)
    neighbors = neighbors_ex1
    distances = distances_ex1
    competences = des_p_test.estimate_competence(query, neighbors, distances)
    assert np.allclose(competences, expected, atol=0.01)
예제 #7
0
def test_estimate_competence_batch(example_estimate_competence,
                                   create_pool_classifiers):
    X, y, neighbors, distances, dsel_processed, _ = example_estimate_competence

    expected = np.array([[0.57142857, 0.4285714, 0.57142857],
                         [0.71428571, 0.2857142, 0.71428571],
                         [0.2857142, 0.71428571, 0.2857142]])

    des_p_test = DESP(create_pool_classifiers)
    des_p_test.fit(X, y)
    competences = des_p_test.estimate_competence(neighbors, distances)
    assert np.allclose(competences, expected, atol=0.01)
예제 #8
0
def test_select_ten_classes(index, ):
    query = np.atleast_2d([1, 1])

    des_p_test = DESP(create_pool_classifiers())
    des_p_test.fit(X_dsel_ex1, y_dsel_ex1)

    des_p_test.n_classes = 10
    des_p_test.DFP_mask = np.ones(des_p_test.n_classifiers)
    des_p_test.neighbors = neighbors_ex1[index, :]
    des_p_test.distances = distances_ex1[index, :]

    competences = des_p_test.estimate_competence(query)
    selected = des_p_test.select(competences)

    assert selected == list(range(des_p_test.n_classifiers))
예제 #9
0
def test_estimate_competence(index, expected):
    query = np.atleast_2d([1, 1])

    des_p_test = DESP(create_pool_classifiers())
    des_p_test.fit(X_dsel_ex1, y_dsel_ex1)
    des_p_test.DFP_mask = np.ones(des_p_test.n_classifiers)
    des_p_test.neighbors = neighbors_ex1[index, :]
    des_p_test.distances = distances_ex1[index, :]
    competences = des_p_test.estimate_competence(query)
    assert np.isclose(competences, expected, atol=0.01).all()
예제 #10
0
def test_select_three_classes(index, expected):
    query = np.atleast_2d([1, 1])

    des_p_test = DESP(create_pool_classifiers())
    des_p_test.fit(X_dsel_ex1, y_dsel_ex1)

    des_p_test.n_classes = 3
    des_p_test.neighbors = neighbors_ex1[index, :]
    des_p_test.distances = distances_ex1[index, :]

    competences = des_p_test.estimate_competence(query)
    selected = des_p_test.select(competences)

    assert np.array_equal(selected, expected)
예제 #11
0
파일: test_desp.py 프로젝트: trasse/DESlib
def test_select_two_classes(index, expected):
    query = np.atleast_2d([1, 1])

    des_p_test = DESP(create_pool_classifiers())
    des_p_test.fit(X_dsel_ex1, y_dsel_ex1)

    neighbors = neighbors_ex1[index, :].reshape(1, -1)
    distances = distances_ex1[index, :].reshape(1, -1)

    competences = des_p_test.estimate_competence(query, neighbors, distances)
    selected = des_p_test.select(competences)

    assert np.array_equal(selected, expected)
예제 #12
0
def test_desp():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    desp = DESP(pool_classifiers, DFP=True, with_IH=True, IH_rate=0.1)
    desp.fit(X_dsel, y_dsel)
    assert np.isclose(desp.score(X_test, y_test), 0.906060606060606)
예제 #13
0
파일: test_desp.py 프로젝트: trasse/DESlib
def test_predict_proba():
    X = X_dsel_ex1
    y = y_dsel_ex1
    clf1 = Perceptron()
    clf1.fit(X, y)
    DESP([clf1, clf1]).fit(X, y)
예제 #14
0
RF = RandomForestClassifier(random_state=rng, n_estimators=10)
RF.fit(X_train, y_train)

X_train, X_dsel, y_train, y_dsel = train_test_split(X_train,
                                                    y_train,
                                                    test_size=0.50,
                                                    random_state=rng)

stacked = StackedClassifier(RF, LogisticRegression())
stacked.fit(X_dsel, y_dsel)

# Initialize a DS technique. Here we specify the size of
# the region of competence (5 neighbors)
knorau = KNORAU(RF, k=5, random_state=rng)
kne = KNORAE(RF, k=5, random_state=rng)
desp = DESP(RF, k=5, random_state=rng)
ola = OLA(RF, k=5, random_state=rng)
mcb = MCB(RF, k=5, random_state=rng)
meta = METADES(RF, k=5, random_state=rng)

# Fit the DS techniques
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)

###############################################################################
# Plotting the results
# -----------------------
예제 #15
0
def test_desp():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    desp = DESP(pool_classifiers, DFP=True)
    desp.fit(X_dsel, y_dsel)
    assert np.isclose(desp.score(X_test, y_test), 0.896969696969697)
예제 #16
0
def main():
    ###############################################################################
    # Preparing the dataset
    # ---------------------
    # In this part we load the breast cancer dataset from scikit-learn and
    # preprocess it in order to pass to the DS models. An important point here is
    # to normalize the data so that it has zero mean and unit variance, which is
    # a common requirement for many machine learning algorithms.
    # This step can be easily done using the StandardScaler class.

    rng = np.random.RandomState(123)
    data = load_breast_cancer()
    X = data.data
    y = data.target
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=rng)

    # Scale the variables to have 0 mean and unit variance
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Split the data into training and DSEL for DS techniques
    X_train, X_dsel, y_train, y_dsel = train_test_split(X_train,
                                                        y_train,
                                                        test_size=0.5,
                                                        random_state=rng)

    # Train a pool of 100 base classifiers
    pool_classifiers = BaggingClassifier(Perceptron(max_iter=10),
                                         n_estimators=100,
                                         random_state=rng)
    pool_classifiers.fit(X_train, y_train)

    # Initialize the DS techniques
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers)
    desp = DESP(pool_classifiers)
    ola = OLA(pool_classifiers)
    mcb = MCB(pool_classifiers)

    ###############################################################################
    # Calibrating base classifiers
    # -----------------------------
    # Some dynamic selection techniques requires that the base classifiers estimate
    # probabilities in order to estimate its competence level. Since the Perceptron
    # model is not a probabilistic classifier (does not implements the
    # predict_proba method, it needs to be calibrated for
    # probability estimation before being used by such DS techniques. This step can
    # be conducted using the CalibrateClassifierCV class from scikit-learn. Note
    # that in this example we pass a prefited pool of classifiers to the
    # calibration method in order to use exactly the same pool used in the other
    # DS methods.
    calibrated_pool = []
    for clf in pool_classifiers:
        calibrated = CalibratedClassifierCV(base_estimator=clf, cv='prefit')
        calibrated.fit(X_dsel, y_dsel)
        calibrated_pool.append(calibrated)

    apriori = APriori(calibrated_pool)
    meta = METADES(calibrated_pool)

    knorau.fit(X_dsel, y_dsel)
    kne.fit(X_dsel, y_dsel)
    desp.fit(X_dsel, y_dsel)
    ola.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)
    apriori.fit(X_dsel, y_dsel)
    meta.fit(X_dsel, y_dsel)

    ###############################################################################
    # Evaluating the methods
    # -----------------------
    # Let's now evaluate the methods on the test set. We also use the performance
    # of Bagging (pool of classifiers without any selection) as a baseline
    # comparison. We can see that  the majority of DS methods achieve higher
    # classification accuracy.

    print('Evaluating DS techniques:')
    print('Classification accuracy KNORA-Union: ',
          knorau.score(X_test, y_test))
    print('Classification accuracy KNORA-Eliminate: ',
          kne.score(X_test, y_test))
    print('Classification accuracy DESP: ', desp.score(X_test, y_test))
    print('Classification accuracy OLA: ', ola.score(X_test, y_test))
    print('Classification accuracy A priori: ', apriori.score(X_test, y_test))
    print('Classification accuracy MCB: ', mcb.score(X_test, y_test))
    print('Classification accuracy META-DES: ', meta.score(X_test, y_test))
    print('Classification accuracy Bagging: ',
          pool_classifiers.score(X_test, y_test))
예제 #17
0
def test_predict_proba(create_X_y):
    X, y = create_X_y
    clf1 = Perceptron()
    clf1.fit(X, y)
    DESP([clf1, clf1]).fit(X, y)
예제 #18
0
X_train, X_dsel, y_train, y_dsel = train_test_split(X_train,
                                                    y_train,
                                                    test_size=0.5,
                                                    random_state=rng)
# Considering a pool composed of 10 base classifiers
pool_classifiers = RandomForestClassifier(n_estimators=10,
                                          random_state=rng,
                                          max_depth=10)
pool_classifiers.fit(X_train, y_train)

# DS techniques without DFP
apriori = APriori(pool_classifiers)
aposteriori = APosteriori(pool_classifiers)
ola = OLA(pool_classifiers)
lca = LCA(pool_classifiers)
desp = DESP(pool_classifiers)
meta = METADES(pool_classifiers)

apriori.fit(X_dsel, y_dsel)
aposteriori.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
lca.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)

print('Evaluating DS techniques:')
print('Classification accuracy of OLA: ', ola.score(X_test, y_test))
print('Classification accuracy of LCA: ', lca.score(X_test, y_test))
print('Classification accuracy of A priori: ', apriori.score(X_test, y_test))
print('Classification accuracy of A posteriori: ',
      aposteriori.score(X_test, y_test))
예제 #19
0
def test_desp(knne, expected):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    desp = DESP(pool_classifiers, DFP=True, knne=knne)
    desp.fit(X_dsel, y_dsel)
    assert np.isclose(desp.score(X_test, y_test), expected)
예제 #20
0
# Training a random forest to be used as the pool of classifiers.
# We set the maximum depth of the tree so that it
# can estimate probabilities
pool_classifiers = RandomForestClassifier(n_estimators=100, max_depth=5,
                                          random_state=rng)
pool_classifiers.fit(X_train, y_train)

stacked = StackedClassifier(pool_classifiers, LogisticRegression())
stacked.fit(X_dsel, y_dsel)

# Initialize a DS technique. Here we specify the size of
# the region of competence (5 neighbors)
knorau = KNORAU(pool_classifiers, random_state=rng)
kne = KNORAE(pool_classifiers, k=5, random_state=rng)
desp = DESP(pool_classifiers, k=5, random_state=rng)
ola = OLA(pool_classifiers, k=5, random_state=rng)
mcb = MCB(pool_classifiers, k=5, random_state=rng)
meta = METADES(pool_classifiers, k=5, random_state=rng)

# Fit the DS techniques
knorau.fit(X_dsel, y_dsel)
kne.fit(X_dsel, y_dsel)
desp.fit(X_dsel, y_dsel)
meta.fit(X_dsel, y_dsel)
ola.fit(X_dsel, y_dsel)
mcb.fit(X_dsel, y_dsel)

###############################################################################
# Plotting the results
# -----------------------
예제 #21
0
def test_check_estimator():
    check_estimator(DESP())
def test_desp():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    desp = DESP(pool_classifiers)
    desp.fit(X_dsel, y_dsel)
    assert np.isclose(desp.score(X_test, y_test), 0.6954545454545454)
예제 #23
0
    # split the data into training and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    RF = RandomForestClassifier()
    RF.fit(X_train, y_train)

    X_train, X_dsel, y_train, y_dsel = train_test_split(X, y, test_size=0.50)

    # Training a random forest to be used as the pool of classifiers. We set the maximum depth of the tree so that it
    # can estimate probabilities
    pool_classifiers = RandomForestClassifier(n_estimators=10, max_depth=5)
    pool_classifiers.fit(X_train, y_train)

    # Initialize a DS technique. Here we specify the size of the region of competence (5 neighbors)
    knorau = KNORAU(pool_classifiers)
    kne = KNORAE(pool_classifiers, k=5)
    desp = DESP(pool_classifiers, k=5)
    ola = OLA(pool_classifiers, k=5)
    mcb = MCB(pool_classifiers, k=5)
    meta = METADES(pool_classifiers, k=5)

    # Fit the DS techniques
    knorau.fit(X_dsel, y_dsel)
    kne.fit(X_dsel, y_dsel)
    desp.fit(X_dsel, y_dsel)
    meta.fit(X_dsel, y_dsel)
    ola.fit(X_dsel, y_dsel)
    mcb.fit(X_dsel, y_dsel)

    # Calculate classification accuracy of each technique
    print('Classification accuracy RF: ', RF.score(X_test, y_test))
    print('Evaluating DS techniques:')
예제 #24
0
    ax.set_xlim((0, 1))
    ax.set_ylim((0, 1))

plt.show()
plt.tight_layout()

###############################################################################
# Comparison with Dynamic Selection techniques
# --------------------------------------------
#
# We will now consider four DS methods: k-Nearest Oracle-Eliminate (KNORA-E),
# Dynamic Ensemble Selection performance (DES-P), Overall Local Accuracy (OLA)
# and Rank. Let's train the classifiers and plot their decision boundaries:

knora_e = KNORAE(pool_classifiers).fit(X_train, y_train)
desp = DESP(pool_classifiers).fit(X_train, y_train)
ola = OLA(pool_classifiers).fit(X_train, y_train)
rank = Rank(pool_classifiers).fit(X_train, y_train)

# Plotting the Decision Border of the DS methods.
fig2, sub = plt.subplots(2, 2, figsize=(15, 10))
plt.subplots_adjust(wspace=0.4, hspace=0.4)
titles = [
    'KNORA-Eliminate', 'DES-P', 'Overall Local Accuracy (OLA)', 'Modified Rank'
]

classifiers = [knora_e, desp, ola, rank]
for clf, ax, title in zip(classifiers, sub.flatten(), titles):
    plot_classifier_decision(ax, clf, X_train, mode='filled', alpha=0.4)
    plot_dataset(X_test, y_test, ax=ax)
    ax.set_xlim(np.min(X[:, 0]), np.max(X[:, 0]))
예제 #25
0
def test_select_none_competent():
    des_p_test = DESP(create_pool_classifiers())
    des_p_test.n_classes = 2
    competences = np.ones(des_p_test.n_classifiers) * 0.49
    indices = des_p_test.select(competences)
    assert indices == list(range(des_p_test.n_classifiers))
예제 #26
0
def test_desp(knn_methods):
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()

    desp = DESP(pool_classifiers, knn_classifier=knn_methods)
    desp.fit(X_dsel, y_dsel)
    assert np.isclose(desp.score(X_test, y_test), 0.97340425531914898)