Exemple #1
0
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    np.random.seed(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = np.random.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    assert(np.size(np.unique(clustering.labels_)) == 10)
Exemple #2
0
from scikits.learn.cluster import Ward

ward = Ward(n_clusters=15)

n_samples = np.logspace(.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples, n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)

for i, n in enumerate(n_samples):
    for j, p in enumerate(n_features):
        X = np.random.normal(size=(n, p))
        t0 = time.time()
        ward.fit(X)
        scikits_time[j, i] = time.time() - t0
        t0 = time.time()
        hierarchy.ward(X)
        scipy_time[j, i] = time.time() - t0

ratio = scikits_time / scipy_time

pl.clf()
pl.imshow(np.log(ratio), aspect='auto', origin="lower")
pl.colorbar()
pl.contour(ratio, levels=[
    1,
], colors='k')
pl.yticks(range(len(n_features)), n_features.astype(np.int))
pl.ylabel('N features')
from scikits.learn.cluster import Ward

ward = Ward(n_clusters=15)

n_samples = np.logspace(.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples,
                                    n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)

for i, n in enumerate(n_samples):
    for j, p in enumerate(n_features):
        X = np.random.normal(size=(n, p))
        t0 = time.time()
        ward.fit(X)
        scikits_time[j, i] = time.time() - t0
        t0 = time.time()
        hierarchy.ward(X)
        scipy_time[j, i] = time.time() - t0

ratio = scikits_time/scipy_time

pl.clf()
pl.imshow(np.log(ratio), aspect='auto', origin="lower")
pl.colorbar()
pl.contour(ratio, levels=[1, ], colors='k')
pl.yticks(range(len(n_features)), n_features.astype(np.int))
pl.ylabel('N features')
pl.xticks(range(len(n_samples)), n_samples.astype(np.int))
pl.xlabel('N samples')