def test_ward_clustering(): """ Check that we obtain the correct number of clusters with Ward clustering. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(100, 50) connectivity = grid_to_graph(*mask.shape) clustering = Ward(n_clusters=10, connectivity=connectivity) clustering.fit(X) assert(np.size(np.unique(clustering.labels_)) == 10)
from scikits.learn.cluster import Ward ward = Ward(n_clusters=15) n_samples = np.logspace(.5, 3, 9) n_features = np.logspace(1, 3.5, 7) N_samples, N_features = np.meshgrid(n_samples, n_features) scikits_time = np.zeros(N_samples.shape) scipy_time = np.zeros(N_samples.shape) for i, n in enumerate(n_samples): for j, p in enumerate(n_features): X = np.random.normal(size=(n, p)) t0 = time.time() ward.fit(X) scikits_time[j, i] = time.time() - t0 t0 = time.time() hierarchy.ward(X) scipy_time[j, i] = time.time() - t0 ratio = scikits_time / scipy_time pl.clf() pl.imshow(np.log(ratio), aspect='auto', origin="lower") pl.colorbar() pl.contour(ratio, levels=[ 1, ], colors='k') pl.yticks(range(len(n_features)), n_features.astype(np.int)) pl.ylabel('N features')
from scikits.learn.cluster import Ward ward = Ward(n_clusters=15) n_samples = np.logspace(.5, 3, 9) n_features = np.logspace(1, 3.5, 7) N_samples, N_features = np.meshgrid(n_samples, n_features) scikits_time = np.zeros(N_samples.shape) scipy_time = np.zeros(N_samples.shape) for i, n in enumerate(n_samples): for j, p in enumerate(n_features): X = np.random.normal(size=(n, p)) t0 = time.time() ward.fit(X) scikits_time[j, i] = time.time() - t0 t0 = time.time() hierarchy.ward(X) scipy_time[j, i] = time.time() - t0 ratio = scikits_time/scipy_time pl.clf() pl.imshow(np.log(ratio), aspect='auto', origin="lower") pl.colorbar() pl.contour(ratio, levels=[1, ], colors='k') pl.yticks(range(len(n_features)), n_features.astype(np.int)) pl.ylabel('N features') pl.xticks(range(len(n_samples)), n_samples.astype(np.int)) pl.xlabel('N samples')