Beispiel #1
0
def compute_log_inertia(X, n_clusters, T, bb_min, bb_max, random_state=0):
    """Compute the log inertia of X and X_t.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row corresponds
        to a single data point.

    n_clusters: int
        The desired number of clusters.

    T: int
        Number of draws of X_t.

    bb_min: array, shape (n_features,)
        Inferior corner of the bounding box of X.

    bb_max: array, shape (n_features,)
        Superior corner of the bounding box of X.

    random_state: int, defaults to 0.
        A random number generator instance.

    Returns
    -------
    log_inertia: float
        Log of the inertia of the K-means applied to X.

    mean_log_inertia_rand: float
        Mean of the log of the inertia of the K-means applied to the different
        X_t.

    std_log_inertia_rand: float
        Standard deviation of the log of the inertia of the K-means applied to
        the different X_t.
    """
    n_samples, n_features = X.shape
    rng = np.random.RandomState(random_state)

    # Compute inertia for real data
    _, _, inertia = kmeans(X, n_clusters=n_clusters)

    # Compute the random inertia
    rand_inertia = np.empty(T)
    for t in range(T):
        X_t = (rng.uniform(size=X.shape) * (bb_max - bb_min) + bb_min)
        _, _, rand_inertia[t] = kmeans(X_t, n_clusters=n_clusters)
    rand_inertia = np.log(rand_inertia)

    return np.log(inertia), np.mean(rand_inertia), np.std(rand_inertia)
Beispiel #2
0
def spectral_clustering(X, n_clusters=2):
    """Compute the affinity matrix from the number of neighbors.

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)
        List of n_features-dimensional data points. Each row corresponds
        to a single data point.

    n_cluster: int, defaults to 2
        The number of clusters to form.

    Returns
    -------
    labels: array-like, shape (n_samples,)
        The estimated labels
    """
    # Q10: Complete the spectral clustering here.
    W = compute_affinity_matrix(X)

    L = np.diag(W.sum(1)) - W

    U = scipy.linalg.eigh(L)[1][:, :n_clusters]
    labels, _, _ = kmeans(U, n_clusters=n_clusters)
    return labels
"""Example of how to use clustering to compress images."""

import numpy as np
from scipy import ndimage
import matplotlib.pyplot as plt

from kmeans_sol import kmeans

img = ndimage.imread('china.jpg')
plt.imshow(img)
n_rows, n_cols, n_colors = img.shape

X = img.reshape(-1, n_colors).astype(np.float)
n_clusters = 64

labels, centers, _ = kmeans(X, n_clusters=n_clusters, n_iter=500)

X_quant = np.empty(X.shape)
for label in range(n_clusters):
    X_quant[labels == label, :] = centers[label]

img_quant = X_quant.reshape(img.shape).astype(np.uint8)

plt.figure()
plt.imshow(img_quant)
plt.show()
Beispiel #4
0
                     factor=.5,
                     noise=.05,
                     shuffle=True,
                     random_state=random_state)
    }

    # Q9 - Q11 : Analysis of datasets
    plt.figure(figsize=(12, 8))

    for i, (_, data) in enumerate(datasets.items()):
        X, y = data
        n_clusters = np.max(y) + 1

        # K-Means
        t0 = time.time()
        labels_kmeans, _, _ = kmeans(X, n_clusters=n_clusters)
        time_kmeans = time.time() - t0

        # Spectral
        t0 = time.time()
        labels_spectral = spectral_clustering(X, n_clusters=n_clusters)
        time_spectral = time.time() - t0

        for j, (labels, t) in enumerate(
                zip((labels_kmeans, labels_spectral),
                    (time_kmeans, time_spectral))):
            ax = plt.subplot(2, 3, 3 * j + i + 1)
            for k in range(n_clusters):
                ax.scatter(X[labels == k, 0],
                           X[labels == k, 1],
                           color=color[k])
Beispiel #5
0
                                           n_clusters_max,
                                           T,
                                           random_state=0)
    for k, value in enumerate(delta):
        if value > 0:
            break

    return clusters_range[k]


if __name__ == '__main__':
    # Parameters
    random_state = 0
    n_samples, n_clusters_max = 1000, 10
    color = 'rgbcmyk'

    n_clusters = 5
    X, labels = make_blobs(n_samples=n_samples,
                           random_state=random_state,
                           centers=n_clusters)

    plot_result(*compute_gap(X, n_clusters_max))

    plt.figure()
    n_clusters_opt = optimal_n_clusters_search(X, n_clusters_max)
    labels, _, _ = kmeans(X, n_clusters_opt)
    for k in range(n_clusters):
        plt.scatter(X[labels == k, 0], X[labels == k, 1], color=color[k])
        plt.axis("equal")
    plt.show()