Exemple #1
0
def test_transform():
    km = KMeans(n_clusters=n_clusters)
    km.fit(X)
    X_new = km.transform(km.cluster_centers_)

    for c in range(n_clusters):
        assert_equal(X_new[c, c], 0)
        for c2 in range(n_clusters):
            if c != c2:
                assert_greater(X_new[c, c2], 0)
Exemple #2
0
def test_n_init():
    """Check that increasing the number of init increases the quality"""
    n_runs = 5
    n_init_range = [1, 5, 10]
    score = np.zeros((len(n_init_range), n_runs))
    for i, n_init in enumerate(n_init_range):
        for j in range(n_runs):
            km = KMeans(n_clusters=n_clusters, n_init=n_init, random_state=j).fit(X)
            score[i, j] = km.score_

    score = score.mean(axis=1)
    failure_msg = ("Score %r should be decreasing"
                   " when n_init is increasing.") % list(score)
    for i in range(len(n_init_range) - 1):
        assert_true(score[i] >= score[i + 1], failure_msg)
Exemple #3
0
def test_predict():
    km = KMeans(n_clusters=n_clusters, random_state=42)

    km.fit(X)

    # sanity check: predict centroid labels
    pred = km.predict(km.cluster_centers_)
    assert_array_equal(pred, np.arange(n_clusters))

    # sanity check: re-predict labeling for training set samples
    pred = km.predict(X)
    assert_array_equal(pred, km.labels_)

    # re-predict labels for training set using fit_predict
    pred = km.fit_predict(X)
    assert_array_equal(pred, km.labels_)
Exemple #4
0
def test_k_means_random_init():
    km = KMeans(n_clusters=n_clusters, random_state=42)
    km.fit(X)
    _check_fitted_model(km)
Exemple #5
0
def test_score():
    km1 = KMeans(n_clusters=n_clusters, n_init=1, max_iter=1, random_state=42)
    s1 = km1.fit(X).score(X)
    km2 = KMeans(n_clusters=n_clusters, max_iter=10, random_state=42)
    s2 = km2.fit(X).score(X)
    assert_greater(s2 + 1, s1)