def compute_log_inertia(X, n_clusters, T, bb_min, bb_max, random_state=0): """Compute the log inertia of X and X_t. Parameters ---------- X: array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. n_clusters: int The desired number of clusters. T: int Number of draws of X_t. bb_min: array, shape (n_features,) Inferior corner of the bounding box of X. bb_max: array, shape (n_features,) Superior corner of the bounding box of X. random_state: int, defaults to 0. A random number generator instance. Returns ------- log_inertia: float Log of the inertia of the K-means applied to X. mean_log_inertia_rand: float Mean of the log of the inertia of the K-means applied to the different X_t. std_log_inertia_rand: float Standard deviation of the log of the inertia of the K-means applied to the different X_t. """ n_samples, n_features = X.shape rng = np.random.RandomState(random_state) # Compute inertia for real data _, _, inertia = kmeans(X, n_clusters=n_clusters) # Compute the random inertia rand_inertia = np.empty(T) for t in range(T): X_t = (rng.uniform(size=X.shape) * (bb_max - bb_min) + bb_min) _, _, rand_inertia[t] = kmeans(X_t, n_clusters=n_clusters) rand_inertia = np.log(rand_inertia) return np.log(inertia), np.mean(rand_inertia), np.std(rand_inertia)
def spectral_clustering(X, n_clusters=2): """Compute the affinity matrix from the number of neighbors. Parameters ---------- X: array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. n_cluster: int, defaults to 2 The number of clusters to form. Returns ------- labels: array-like, shape (n_samples,) The estimated labels """ # Q10: Complete the spectral clustering here. W = compute_affinity_matrix(X) L = np.diag(W.sum(1)) - W U = scipy.linalg.eigh(L)[1][:, :n_clusters] labels, _, _ = kmeans(U, n_clusters=n_clusters) return labels
"""Example of how to use clustering to compress images.""" import numpy as np from scipy import ndimage import matplotlib.pyplot as plt from kmeans_sol import kmeans img = ndimage.imread('china.jpg') plt.imshow(img) n_rows, n_cols, n_colors = img.shape X = img.reshape(-1, n_colors).astype(np.float) n_clusters = 64 labels, centers, _ = kmeans(X, n_clusters=n_clusters, n_iter=500) X_quant = np.empty(X.shape) for label in range(n_clusters): X_quant[labels == label, :] = centers[label] img_quant = X_quant.reshape(img.shape).astype(np.uint8) plt.figure() plt.imshow(img_quant) plt.show()
factor=.5, noise=.05, shuffle=True, random_state=random_state) } # Q9 - Q11 : Analysis of datasets plt.figure(figsize=(12, 8)) for i, (_, data) in enumerate(datasets.items()): X, y = data n_clusters = np.max(y) + 1 # K-Means t0 = time.time() labels_kmeans, _, _ = kmeans(X, n_clusters=n_clusters) time_kmeans = time.time() - t0 # Spectral t0 = time.time() labels_spectral = spectral_clustering(X, n_clusters=n_clusters) time_spectral = time.time() - t0 for j, (labels, t) in enumerate( zip((labels_kmeans, labels_spectral), (time_kmeans, time_spectral))): ax = plt.subplot(2, 3, 3 * j + i + 1) for k in range(n_clusters): ax.scatter(X[labels == k, 0], X[labels == k, 1], color=color[k])
n_clusters_max, T, random_state=0) for k, value in enumerate(delta): if value > 0: break return clusters_range[k] if __name__ == '__main__': # Parameters random_state = 0 n_samples, n_clusters_max = 1000, 10 color = 'rgbcmyk' n_clusters = 5 X, labels = make_blobs(n_samples=n_samples, random_state=random_state, centers=n_clusters) plot_result(*compute_gap(X, n_clusters_max)) plt.figure() n_clusters_opt = optimal_n_clusters_search(X, n_clusters_max) labels, _, _ = kmeans(X, n_clusters_opt) for k in range(n_clusters): plt.scatter(X[labels == k, 0], X[labels == k, 1], color=color[k]) plt.axis("equal") plt.show()