예제 #1
0
def test_shc_validation():
    """Test the validation of hyper-parameters and input data in SHC"""
    X, _ = generate_data(supervised=False, affinity=False)

    with pytest.raises(ValueError):
        clusterer = ScipyHierarchicalClustering(n_clusters=len(X) + 1)
        labels = clusterer.fit_predict(X)

    with pytest.raises(ValueError):
        clusterer = ScipyHierarchicalClustering(n_clusters=-1)
        labels = clusterer.fit_predict(X)

    with pytest.raises(ValueError):
        clusterer = ScipyHierarchicalClustering(scoring_data="affinity")
        labels = clusterer.fit_predict(X)
예제 #2
0
def test_shc_custom_affinity():
    """Test custom affinity function in SHC."""
    X, _ = generate_data(supervised=False, affinity=False)
    clusterer = ScipyHierarchicalClustering(affinity=euclidean_distances,
                                            n_clusters=4)
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #3
0
def test_shc_precomputed_distance():
    """Test using precomputed distances in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)
    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            n_clusters=4)
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #4
0
def test_shc_precomputed_distance():
    """Test using precomputed distances in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)
    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            n_clusters=4)
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #5
0
def test_shc_custom_affinity():
    """Test custom affinity function in SHC."""
    X, _ = generate_data(supervised=False, affinity=False)
    clusterer = ScipyHierarchicalClustering(affinity=euclidean_distances,
                                            n_clusters=4)
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #6
0
def test_shc_unsupervised_scoring_data_raw():
    """Test unsupervised clustering for SHC when scoring_data='raw'."""
    X, _ = generate_data(supervised=False, affinity=False)
    _scoring = partial(silhouette_score, metric="euclidean")
    clusterer = ScipyHierarchicalClustering(affinity=euclidean_distances,
                                            scoring=_scoring,
                                            scoring_data="raw")
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #7
0
def test_shc_unsupervised_scoring_data_affinity():
    """Test unsupervised clustering for SHC when scoring_data='affinity'."""
    # Passing feature matrix
    X, _ = generate_data(supervised=False, affinity=False)
    _scoring = partial(silhouette_score, metric="precomputed")
    clusterer = ScipyHierarchicalClustering(affinity=euclidean_distances,
                                            scoring=_scoring,
                                            scoring_data="affinity")
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))

    # Passing affinity matrix
    X, _ = generate_data(supervised=False, affinity=True)
    _scoring = partial(silhouette_score, metric="precomputed")
    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            scoring=_scoring,
                                            scoring_data="affinity")
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #8
0
def test_shc_unsupervised_scoring_data_None():
    """Test unsupervised clustering for SHC when scoring_data is None."""
    X, _ = generate_data(supervised=False, affinity=False)

    def _scoring(labels_pred):
        return -np.inf

    clusterer = ScipyHierarchicalClustering(affinity=euclidean_distances,
                                            scoring=_scoring)
    labels = clusterer.fit_predict(X)
    assert_array_equal([100], np.bincount(labels))
예제 #9
0
def test_shc_threshold():
    """Test changing threshold in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)

    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            scoring_data="affinity",
                                            n_clusters=4)

    labels = clusterer.fit_predict(X)
    clusterer.set_params(threshold=clusterer.linkage_[-4,  2])
    labels = clusterer.labels_
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))
예제 #10
0
def test_shc_n_clusters():
    """Test changing number of clusters in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)

    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            n_clusters=4)

    labels = clusterer.fit_predict(X)
    assert_equal(len(np.unique(labels)), 4)
    clusterer.set_params(n_clusters=10)
    labels = clusterer.labels_
    assert_equal(len(np.unique(labels)), 10)
예제 #11
0
def test_shc_n_clusters():
    """Test changing number of clusters in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)

    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            n_clusters=4)

    labels = clusterer.fit_predict(X)
    assert_equal(len(np.unique(labels)), 4)
    clusterer.set_params(n_clusters=10)
    labels = clusterer.labels_
    assert_equal(len(np.unique(labels)), 10)
예제 #12
0
def test_shc_threshold():
    """Test changing threshold in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)

    # n_clusters has precedence over threshold
    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            n_clusters=2)
    labels1 = clusterer.fit_predict(X)
    clusterer.set_params(threshold=clusterer.linkage_[-4, 2])
    labels2 = clusterer.labels_
    assert_array_equal(labels1, labels2)
    assert_equal(len(np.unique(labels1)), 2)

    # change threshold
    clusterer.set_params(n_clusters=None, threshold=clusterer.linkage_[-5, 2])
    labels = clusterer.labels_
    assert_equal(len(np.unique(labels)), 5)
    clusterer.set_params(threshold=clusterer.linkage_[-4, 2])
    labels = clusterer.labels_
    assert_equal(len(np.unique(labels)), 4)
예제 #13
0
def test_shc_threshold():
    """Test changing threshold in SHC."""
    X, _ = generate_data(supervised=False, affinity=True)

    # n_clusters has precedence over threshold
    clusterer = ScipyHierarchicalClustering(affinity="precomputed",
                                            n_clusters=2)
    labels1 = clusterer.fit_predict(X)
    clusterer.set_params(threshold=clusterer.linkage_[-4, 2])
    labels2 = clusterer.labels_
    assert_array_equal(labels1, labels2)
    assert_equal(len(np.unique(labels1)), 2)

    # change threshold
    clusterer.set_params(n_clusters=None, threshold=clusterer.linkage_[-5, 2])
    labels = clusterer.labels_
    assert_equal(len(np.unique(labels)), 5)
    clusterer.set_params(threshold=clusterer.linkage_[-4, 2])
    labels = clusterer.labels_
    assert_equal(len(np.unique(labels)), 4)
예제 #14
0
def test_shc_default_euclidean():
    """Test default parameters of SHC, using euclidean distance."""
    X, _ = generate_data(supervised=False, affinity=False)
    clusterer = ScipyHierarchicalClustering(n_clusters=4)
    labels = clusterer.fit_predict(X)
    assert_array_equal([25, 25, 25, 25], np.bincount(labels))