Ejemplo n.º 1
0
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    np.random.seed(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = np.random.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    assert(np.size(np.unique(clustering.labels_)) == 10)
Ejemplo n.º 2
0
"""
Bench the scikit's ward implement compared to scipy's
"""

import time

import numpy as np
from scipy.cluster import hierarchy
import pylab as pl

from scikits.learn.cluster import Ward

ward = Ward(n_clusters=15)

n_samples = np.logspace(.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples, n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)

for i, n in enumerate(n_samples):
    for j, p in enumerate(n_features):
        X = np.random.normal(size=(n, p))
        t0 = time.time()
        ward.fit(X)
        scikits_time[j, i] = time.time() - t0
        t0 = time.time()
        hierarchy.ward(X)
        scipy_time[j, i] = time.time() - t0

ratio = scikits_time / scipy_time
###############################################################################
# Generate data (swiss roll dataset)
n_samples = 5000
noise = 0.05
X = swiss_roll(n_samples, noise)

###############################################################################
# Define the structure A of the data. Here a 10 nearest neighbors
connectivity = kneighbors_graph(X, n_neighbors=10)

###############################################################################
# Compute clustering
print "Compute structured hierarchical clustering..."
st = time.time()
ward = Ward(n_clusters=10).fit(X, connectivity=connectivity)
label = ward.labels_
print "Elapsed time: ", time.time() - st
print "Number of points: ", label.size
print "Number of clusters: ", np.unique(label).size

###############################################################################
# Plot result
fig = pl.figure()
ax = p3.Axes3D(fig)
ax.view_init(7, -80)
for l in np.unique(label):
    ax.plot3D(X[label == l, 0],
              X[label == l, 1],
              X[label == l, 2],
              'o',
Ejemplo n.º 4
0
"""
Bench the scikit's ward implement compared to scipy's
"""

import time

import numpy as np
from scipy.cluster import hierarchy
import pylab as pl

from scikits.learn.cluster import Ward

ward = Ward(n_clusters=15)

n_samples = np.logspace(.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples,
                                    n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)

for i, n in enumerate(n_samples):
    for j, p in enumerate(n_features):
        X = np.random.normal(size=(n, p))
        t0 = time.time()
        ward.fit(X)
        scikits_time[j, i] = time.time() - t0
        t0 = time.time()
        hierarchy.ward(X)
        scipy_time[j, i] = time.time() - t0
Ejemplo n.º 5
0
import pylab as pl
import mpl_toolkits.mplot3d.axes3d as p3
from scikits.learn.cluster import Ward
from scikits.learn.datasets.samples_generator import swiss_roll

###############################################################################
# Generate data (swiss roll dataset)
n_samples = 1000
noise = 0.05
X = swiss_roll(n_samples, noise)

###############################################################################
# Compute clustering
print "Compute unstructured hierarchical clustering..."
st = time.time()
ward = Ward(n_clusters=5).fit(X)
label = ward.labels_
print "Elapsed time: ", time.time() - st
print "Number of points: ", label.size
print "Number of clusters: ", np.unique(label).size

###############################################################################
# Plot result
fig = pl.figure()
ax = p3.Axes3D(fig)
ax.view_init(7, -80)
for l in np.unique(label):
    ax.plot3D(X[label == l, 0],
              X[label == l, 1],
              X[label == l, 2],
              'o',