Finds core samples of high density and expands clusters from them. """ print __doc__ import numpy as np from scipy.spatial import distance from scikits.learn.cluster import DBSCAN from scikits.learn import metrics from scikits.learn.datasets.samples_generator import make_blobs ############################################################################## # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4) ############################################################################## # Compute similarities D = distance.squareform(distance.pdist(X)) S = 1 - (D / np.max(D)) ############################################################################## # Compute DBSCAN db = DBSCAN().fit(S, eps=0.95, min_samples=10) core_samples = db.core_sample_indices_ labels = db.labels_ # Number of clusters in labels, ignoring noise if present. n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
Reference: K. Funkunaga and L.D. Hosteler, "The Estimation of the Gradient of a Density Function, with Applications in Pattern Recognition" """ print __doc__ import numpy as np from scikits.learn.cluster import MeanShift, estimate_bandwidth from scikits.learn.datasets.samples_generator import make_blobs ################################################################################ # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, _ = make_blobs(n_samples=750, centers=centers, cluster_std=0.6) ################################################################################ # Compute clustering with MeanShift bandwidth = estimate_bandwidth(X, quantile=0.3) ms = MeanShift(bandwidth=bandwidth) ms.fit(X) labels = ms.labels_ cluster_centers = ms.cluster_centers_ labels_unique = np.unique(labels) n_clusters_ = len(labels_unique) print "number of estimated clusters : %d" % n_clusters_ ################################################################################
Reference: Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages Between Data Points", Science Feb. 2007 """ print __doc__ import numpy as np from scikits.learn.cluster import AffinityPropagation from scikits.learn import metrics from scikits.learn.datasets.samples_generator import make_blobs ############################################################################## # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=300, centers=centers, cluster_std=0.5) ############################################################################## # Compute similarities X_norms = np.sum(X**2, axis=1) S = -X_norms[:, np.newaxis] - X_norms[np.newaxis, :] + 2 * np.dot(X, X.T) p = 10 * np.median(S) ############################################################################## # Compute Affinity Propagation af = AffinityPropagation().fit(S, p) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ n_clusters_ = len(cluster_centers_indices)