Ejemplo n.º 1
0
def test_des_clustering():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)

    des_clustering = DESClustering(pool_classifiers, rng=rng)
    des_clustering.fit(X_dsel, y_dsel)
    assert np.isclose(des_clustering.score(X_test, y_test), 0.97872340425531912)
Ejemplo n.º 2
0
def test_diversity_metric_ratio():
    test = DESClustering(create_pool_classifiers() * 10, metric='ratio')
    # Mocking this method to avoid preprocessing the cluster information that
    # is not required in this test.
    test._preprocess_clusters = MagicMock(return_value=1)
    test.fit(X_dsel_ex1, y_dsel_ex1)
    assert test.diversity_func_ == ratio_errors
Ejemplo n.º 3
0
def test_classify_instance():
    query = np.atleast_2d([1, -1])
    clustering_test = DESClustering(create_pool_classifiers() * 4, k=2)
    clustering_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9])

    predicted = clustering_test.classify_instance(query)
    assert predicted == 0
Ejemplo n.º 4
0
def test_fit_heterogeneous_clusters(example_estimate_competence,
                                    create_pool_classifiers):
    clustering_test = DESClustering(create_pool_classifiers,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)
    X, y = example_estimate_competence[0:2]

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex2)
    clustering_test.fit(X, y)

    # Index selected should be of any classifier that predicts the label 0
    assert np.isclose(clustering_test.performance_cluster_[:, 1],
                      [0.428, 0.375],
                      atol=0.01).all()
    assert np.isclose(clustering_test.performance_cluster_[:, 0],
                      [0.572, 0.625],
                      atol=0.01).all()
    assert clustering_test.indices_[0,
                                    0] == 0 or clustering_test.indices_[0,
                                                                        0] == 2
    assert clustering_test.indices_[1,
                                    0] == 0 or clustering_test.indices_[1,
                                                                        0] == 2
Ejemplo n.º 5
0
def test_classify_with_ds_single_sample():
    query = np.ones(2)
    predictions = np.array([0, 1, 0])

    desknn_test = DESClustering(create_pool_classifiers())
    desknn_test.select = MagicMock(return_value=np.array([[0, 2]]))
    result = desknn_test.classify_with_ds(query, predictions)
    assert np.allclose(result, 0)
Ejemplo n.º 6
0
def test_classify_with_ds_diff_sizes():
    query = np.ones((10, 2))
    predictions = np.ones((5, 3))

    desknn_test = DESClustering(create_pool_classifiers())

    with pytest.raises(ValueError):
        desknn_test.classify_with_ds(query, predictions)
Ejemplo n.º 7
0
def test_classify_with_ds_diff_sizes():
    query = np.ones((10, 2))
    predictions = np.ones((5, 3))

    des_clustering = DESClustering()

    with pytest.raises(ValueError):
        des_clustering.classify_with_ds(query, predictions)
Ejemplo n.º 8
0
def test_diversity_metric_DF(create_X_y):
    X, y = create_X_y

    test = DESClustering(metric='DF')
    # Mocking this method to avoid preprocessing the cluster
    # information that is not required in this test.
    test._preprocess_clusters = MagicMock(return_value=1)
    test.fit(X, y)
    assert test.diversity_func_ == negative_double_fault
Ejemplo n.º 9
0
def test_des_clustering_proba():
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)

    des_clustering = DESClustering(pool_classifiers, rng=rng)
    des_clustering.fit(X_dsel, y_dsel)
    probas = des_clustering.predict_proba(X_test)
    expected = np.load('deslib/tests/expected_values/des_clustering_proba_integration.npy')
    assert np.allclose(probas, expected)
Ejemplo n.º 10
0
def test_proba_with_ds_diff_sizes():
    query = np.ones((10, 2))
    predictions = np.ones((5, 3))
    probabilities = np.ones((5, 3, 2))

    des_clustering = DESClustering()

    with pytest.raises(ValueError):
        des_clustering.predict_proba_with_ds(query, predictions, probabilities)
Ejemplo n.º 11
0
def test_des_clustering():
    from sklearn.cluster import KMeans
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)
    cluster = KMeans(n_clusters=5, random_state=rng)
    des_clustering = DESClustering(pool_classifiers, clustering=cluster)
    des_clustering.fit(X_dsel, y_dsel)
    assert np.isclose(des_clustering.score(X_test, y_test),
                      0.97872340425531912)
Ejemplo n.º 12
0
def test_select():
    query = np.atleast_2d([1, -1])
    clustering_test = DESClustering(create_pool_classifiers() * 2, k=2)
    clustering_test.roc_algorithm.predict = MagicMock(return_value=[0])
    clustering_test.indices = np.array([[0, 2], [1, 4]])
    assert np.array_equal(clustering_test.select(query), [0, 2])

    clustering_test.roc_algorithm.predict = MagicMock(return_value=[1])
    assert np.array_equal(clustering_test.select(query), [1, 4])
Ejemplo n.º 13
0
def test_diversity_metric_ratio(create_X_y):
    X, y = create_X_y

    test = DESClustering(metric_diversity='ratio')
    # Mocking this method to avoid preprocessing the cluster
    # information that is not required in this test.
    test._preprocess_clusters = MagicMock(return_value=1)
    test.fit(X, y)
    assert test.diversity_func_ == ratio_errors
Ejemplo n.º 14
0
def test_classify_instance():
    query = np.atleast_2d([1, -1])
    clustering_test = DESClustering(create_pool_classifiers() * 4, k=2)
    clustering_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9])

    predictions = []
    for clf in clustering_test.pool_classifiers:
        predictions.append(clf.predict(query)[0])

    predicted = clustering_test.classify_instance(query, np.array(predictions))
    assert predicted == 0
Ejemplo n.º 15
0
def test_des_clustering_proba():
    from sklearn.cluster import KMeans
    pool_classifiers, X_dsel, y_dsel, X_test, y_test = setup_classifiers()
    rng = np.random.RandomState(123456)
    cluster = KMeans(n_clusters=5, random_state=rng)
    des_clustering = DESClustering(pool_classifiers, clustering=cluster)
    des_clustering.fit(X_dsel, y_dsel)
    probas = des_clustering.predict_proba(X_test)
    expected = np.load(
        'deslib/tests/expected_values/des_clustering_proba_integration.npy')
    assert np.allclose(probas, expected)
Ejemplo n.º 16
0
def test_classify_instance(create_pool_classifiers):
    query = np.ones((1, 2))
    clustering_test = DESClustering(create_pool_classifiers * 4,
                                    clustering=KMeans(n_clusters=2))

    clustering_test.select = MagicMock(return_value=[0, 1, 2, 3, 5, 6, 7, 9])
    predictions = []
    for clf in clustering_test.pool_classifiers:
        predictions.append(clf.predict(query)[0])

    predicted = clustering_test.classify_with_ds(query, np.array(predictions))
    assert predicted == 0
Ejemplo n.º 17
0
def test_fit_homogeneous_clusters():

    clustering_test = DESClustering(create_pool_classifiers()*2, k=2, pct_accuracy=0.5, pct_diversity=0.33)
    clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex1)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert clustering_test.accuracy_cluster[0, 1] == 0.0 and clustering_test.accuracy_cluster[0, [0, 2]].all() == 1.0
    assert clustering_test.accuracy_cluster[1, 1] == 1.0 and clustering_test.accuracy_cluster[1, [0, 2]].all() == 0.0
    for idx in clustering_test.indices[0, :]:
        assert idx in (0, 2, 3, 5)
Ejemplo n.º 18
0
def test_fit_heterogeneous_clusters():

    clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33)
    clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    # Index selected should be of any classifier that predicts the class label 0
    assert np.isclose(clustering_test.accuracy_cluster[:, 1], [0.428, 0.375], atol=0.01).all()
    assert np.isclose(clustering_test.accuracy_cluster[:, 0], [0.572, 0.625], atol=0.01).all()
    assert clustering_test.indices[0, 0] == 0 or clustering_test.indices[0, 0] == 2
    assert clustering_test.indices[1, 0] == 0 or clustering_test.indices[1, 0] == 2
Ejemplo n.º 19
0
def test_fit_clusters_less_diverse(example_estimate_competence,
                                   create_pool_classifiers):
    clustering_test = DESClustering(create_pool_classifiers * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5, pct_diversity=0.33,
                                    more_diverse=False)
    X, y = example_estimate_competence[0:2]

    clustering_test.clustering.predict = MagicMock(return_value=y)
    clustering_test.fit(X, y)

    assert (clustering_test.performance_cluster_[0, 1] == 0.0 and
            clustering_test.performance_cluster_[0, [0, 2]].all() == 1.0)
    assert (clustering_test.performance_cluster_[1, 1] == 1.0 and
            clustering_test.performance_cluster_[1, [0, 2]].all() == 0.0)
    for idx in clustering_test.indices_[0, :]:
        assert idx in (1, 3, 4, 5)
Ejemplo n.º 20
0
def test_fit_clusters_less_diverse():
    clustering_test = DESClustering(create_pool_classifiers() * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33,
                                    more_diverse=False)

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex1)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert (clustering_test.accuracy_cluster_[0, 1] == 0.0
            and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0)
    assert (clustering_test.accuracy_cluster_[1, 1] == 1.0
            and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0)
    for idx in clustering_test.indices_[0, :]:
        assert idx in (1, 3, 4, 5)
Ejemplo n.º 21
0
def test_fit_homogeneous_clusters(create_pool_classifiers,
                                  example_estimate_competence):
    clustering_test = DESClustering(create_pool_classifiers * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    X, y = example_estimate_competence[0:2]
    clustering_test.clustering.predict = MagicMock(return_value=y)

    clustering_test.fit(X, y)

    assert (clustering_test.accuracy_cluster_[0, 1] == 0.0
            and clustering_test.accuracy_cluster_[0, [0, 2]].all() == 1.0)
    assert (clustering_test.accuracy_cluster_[1, 1] == 1.0
            and clustering_test.accuracy_cluster_[1, [0, 2]].all() == 0.0)
    for idx in clustering_test.indices_[0, :]:
        assert idx in (0, 2, 3, 5)
Ejemplo n.º 22
0
def test_fit_homogeneous_clusters():

    clustering_test = DESClustering(create_pool_classifiers() * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex1)

    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert clustering_test.accuracy_cluster_[
        0, 1] == 0.0 and clustering_test.accuracy_cluster_[0,
                                                           [0, 2]].all() == 1.0
    assert clustering_test.accuracy_cluster_[
        1, 1] == 1.0 and clustering_test.accuracy_cluster_[1,
                                                           [0, 2]].all() == 0.0
    for idx in clustering_test.indices_[0, :]:
        assert idx in (0, 2, 3, 5)
Ejemplo n.º 23
0
def test_fit_heterogeneous_clusters():
    clustering_test = DESClustering(create_pool_classifiers(),
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex2)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    # Index selected should be of any classifier that predicts the label 0
    assert np.isclose(clustering_test.accuracy_cluster_[:, 1], [0.428, 0.375],
                      atol=0.01).all()
    assert np.isclose(clustering_test.accuracy_cluster_[:, 0], [0.572, 0.625],
                      atol=0.01).all()
    assert clustering_test.indices_[0,
                                    0] == 0 or clustering_test.indices_[0,
                                                                        0] == 2
    assert clustering_test.indices_[1,
                                    0] == 0 or clustering_test.indices_[1,
                                                                        0] == 2
Ejemplo n.º 24
0
def test_fit_clusters_less_diverse():

    clustering_test = DESClustering(create_pool_classifiers() * 2,
                                    k=2,
                                    pct_accuracy=1.0,
                                    pct_diversity=0.60,
                                    more_diverse=False)

    clustering_test.roc_algorithm.fit_predict = MagicMock(
        return_value=return_cluster_index_ex1)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)

    assert clustering_test.accuracy_cluster[
        0, 1] == 0.0 and clustering_test.accuracy_cluster[0,
                                                          [0, 2]].all() == 1.0
    assert clustering_test.accuracy_cluster[
        1, 1] == 1.0 and clustering_test.accuracy_cluster[1,
                                                          [0, 2]].all() == 0.0
    assert np.isin(clustering_test.indices[0, :], np.array([1, 3, 5, 4])).all()
Ejemplo n.º 25
0
def test_estimate_competence(create_pool_classifiers,
                             example_estimate_competence):
    query = np.atleast_2d([1, 1])
    clustering_test = DESClustering(create_pool_classifiers * 2,
                                    clustering=KMeans(n_clusters=2),
                                    pct_accuracy=0.5,
                                    pct_diversity=0.33)

    X, y = example_estimate_competence[0:2]

    # Keep the original predict method to change after
    clustering_test.clustering.predict = MagicMock(
        return_value=return_cluster_index_ex2)
    clustering_test.fit(X, y)

    clustering_test.clustering_.predict = MagicMock(return_value=0)
    competences = clustering_test.estimate_competence(query)

    assert np.array_equal(competences,
                          clustering_test.performance_cluster_[0, :])

    clustering_test.clustering_.predict = MagicMock(return_value=1)
    competences = clustering_test.estimate_competence(query)
    assert np.array_equal(competences,
                          clustering_test.performance_cluster_[1, :])
Ejemplo n.º 26
0
def test_select():
    query = np.atleast_2d([1, -1])
    clustering_test = DESClustering()

    clustering_test.clustering_ = KMeans()
    clustering_test.clustering_.predict = MagicMock(return_value=[0])
    clustering_test.indices_ = np.array([[0, 2], [1, 4]])
    assert np.array_equal(clustering_test.select(query), [[0, 2]])

    clustering_test.clustering_.predict = MagicMock(return_value=[1])
    assert np.array_equal(clustering_test.select(query), [[1, 4]])
Ejemplo n.º 27
0
def test_estimate_competence():

    query = np.atleast_2d([1, 1])
    clustering_test = DESClustering(create_pool_classifiers(), k=2, pct_accuracy=0.5, pct_diversity=0.33)
    clustering_test.roc_algorithm.fit_predict = MagicMock(return_value=return_cluster_index_ex2)

    clustering_test.DFP_mask = np.ones(clustering_test.n_classifiers)
    clustering_test.fit(X_dsel_ex1, y_dsel_ex1)
    clustering_test.roc_algorithm.predict = MagicMock(return_value=0)
    competences = clustering_test.estimate_competence(query)

    assert np.array_equal(competences, clustering_test.accuracy_cluster[0, :])

    clustering_test.roc_algorithm.predict = MagicMock(return_value=1)
    competences = clustering_test.estimate_competence(query)
    assert np.array_equal(competences, clustering_test.accuracy_cluster[1, :])
Ejemplo n.º 28
0
def test_diversity_metric_ratio():
    test = DESClustering(create_pool_classifiers() * 10, metric='ratio')
    assert test.diversity_func == ratio_errors
Ejemplo n.º 29
0
def test_diversity_metric_DF():
    test = DESClustering(create_pool_classifiers() * 10, metric='DF')
    assert test.diversity_func == negative_double_fault
Ejemplo n.º 30
0
def test_diversity_metric_Q():
    test = DESClustering(create_pool_classifiers() * 10, metric='Q')
    assert test.diversity_func == Q_statistic