Python KMeans Examples

Programming Language: Python

Namespace/Package Name: unsupervised.kmeans

Class/Type: KMeans

Examples at hotexamples.com: 6

Python KMeans - 6 examples found. These are the top rated real world Python examples of unsupervised.kmeans.KMeans extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

KMeans(3)

fit(2)

centroids(1)

predict(1)

Example #1

Show file

File: spectral_clustering.py Project: Kronostheus/plainML

    def fit(self, samples):
        """
        Apply Spectral Clustering algorithm as described in Ng et al. 2002
        Affinity matrix calculated with scaling parameter as described in Zelnik-Manor et al. 2005
        Clustering algorithm used: custom simple KMeans
        :param samples: data samples to cluster
        :return: labels associated with cluster
        """
        self.samples = samples
        self.samples_len = len(samples)

        # Compute affinity matrix (A)
        affinity = self._affinity_matrix()

        # Square root of diagonal matrix (D) composed of the sum of each of A's rows => D^1/2
        d = np.diag(np.power(np.sum(affinity, axis=0), -1 / 2))

        # Compute laplacian matrix (L) using formula L = D^1/2 . A . D^1/2
        laplacian = d @ affinity @ d

        # Eigenvectors of L stacked as a matrix (X)
        _, eig_vecs = sp.sparse.linalg.eigs(laplacian, k=self.k)

        # Normalize X using formula X / sum(X^2)^1/2 which gives us a data sample representation (Y)
        normalized_eig_vecs = eig_vecs / np.linalg.norm(
            eig_vecs, axis=1, keepdims=True)

        # Fit a KMeans algorithm to Y and receive cluster labels
        kmeans = KMeans(k=self.k)

        return kmeans.fit(normalized_eig_vecs)

Example #2

Show file

File: kmeans_2d.py Project: AlexanderFabisch/unsupervised-prototypes

import numpy
import pylab
from unsupervised.kmeans import KMeans

if __name__ == "__main__":
    numpy.random.seed(1)
    X = numpy.vstack((numpy.random.randn(10000, 2)*0.3,
                      numpy.random.randn(10000, 2)*0.3 + numpy.ones(2)))

    estimator = KMeans(2, 200, 10)
    estimator.fit(X)
    print estimator.C_
    print estimator.v
    Y = estimator.predict(X)
    print Y

    pylab.plot(X[:, 0], X[:, 1], "o")
    pylab.plot([estimator.C_[0, 0]], [estimator.C_[0, 1]], "o")
    pylab.plot([estimator.C_[1, 0]], [estimator.C_[1, 1]], "o")
    pylab.show()

Example #3

Show file

File: kmeans_images.py Project: AlexanderFabisch/unsupervised-prototypes

        data = numpy.fmax(numpy.fmin(data, pstd), -pstd) / pstd
        data = (data + 1) * 0.4 + 0.1;
        return data
    images = normalize_data(images)

    patch_width = 8
    n_filters = 25

    n_samples, n_rows, n_cols = images.shape
    n_features = n_rows * n_cols
    patches = [extract_patches_2d(images[i], (patch_width, patch_width),
                                  max_patches=1000, random_state=i)
            for i in range(n_samples)]
    patches = numpy.array(patches).reshape(-1, patch_width * patch_width)
    print("Dataset consists of %d samples" % n_samples)

    estimator = KMeans(n_filters=n_filters, batch_size=1000, n_iterations=200)
    estimator.fit(patches)
    print estimator.predict(patches)

    pylab.figure()
    for i in range(estimator.C_.shape[0]):
        rows = max(int(numpy.sqrt(n_filters)), 2)
        cols = max(int(numpy.sqrt(n_filters)), 2)
        pylab.subplot(rows, cols, i + 1)
        pylab.imshow(estimator.C_[i].reshape(patch_width, patch_width),
                     cmap=pylab.cm.gray, interpolation="nearest")
        pylab.xticks(())
        pylab.yticks(())
    pylab.show()

Example #4

Show file

File: kmeans_mnist.py Project: AlexanderFabisch/unsupervised-prototypes

from unsupervised.kmeans import KMeans
from tools import load_mnist, scale_features, test_classifier


if __name__ == "__main__":
    numpy.random.seed(0)

    train_images, T = load_mnist("training", 60000)
    test_images, T2 = load_mnist("testing", 10000)
    print "Dataset loaded"

    train_cluster = train_images[:10000]
    train_classifier = train_images
    label_classifier = T
    n_filters = 196
    estimator = KMeans(n_filters=n_filters, batch_size=1000, n_iterations=10)
    estimator.fit(train_cluster)
    X = estimator.predict(train_classifier)
    X2 = estimator.predict(test_images)
    X_mean = X.mean(axis=0)
    X_std = X.std(axis=0) + 1e-8
    X = scale_features(X, X_mean, X_std)
    X2 = scale_features(X2, X_mean, X_std)
    print "Transformed datasets"

    test_classifier(X, label_classifier, X2, T2)

    pylab.figure()
    pylab.subplots_adjust(wspace=0.0, hspace=0.0)
    n_cells = numpy.min((int(numpy.sqrt(n_filters)), 10))
    for i in range(n_cells**2):

Example #5

Show file

import numpy as np
import matplotlib.pyplot as plt

from unsupervised.kmeans import KMeans

n_samples = 100
n_features = 2

X = np.random.rand(n_samples, n_features)

inertias = []

for k in range(1, 10):  # len(X)):

    kmeans = KMeans(k=k)
    kmeans.fit(X)

    print(kmeans.inertia_)
    inertias.append(kmeans.inertia_)

plt.figure(figsize=(10, 20))
plt.plot(range(1, len(inertias) + 1), inertias)
plt.show()

Example #6

Show file

    parser.add_argument('--center', type=int, help='Number of data centers.', default=3)
    parser.add_argument('--random_state', type=int, help='Random state for data generation.', default=42)
    parser.add_argument('--n_samples', type=int, help='Number of data points.', default=5000)
    args = parser.parse_args()

    # Setting parameters
    max_iterations = args.max_iter
    n_centers = args.center
    n_samples = args.n_samples
    random_state = args.random_state

    # Create the clusters
    X, y = make_blobs(n_samples=n_samples, centers=n_centers, n_features=2, random_state=random_state, cluster_std=1.5)

    # Clustering
    kmeans = KMeans(k=n_centers, iterations=max_iterations, random_state=random_state, track_history=True)
    kmeans.fit(X)

    # Extract centroids
    centroids = kmeans.history_centroids

    # Create decision boundary data
    h = .1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    area_data = np.c_[xx.ravel(), yy.ravel()]

    # Prepare predictions
    predicted_labels = []
    predicted_area = []