Beispiel #1
0
def visualize_clusters(data, target, problem, k):
    '''
    pca = PCA(n_components=2).fit(data)
    pca_2d = pca.transform(data)
    # now visualize classified data in new projected space
    pl.figure('Reference Plot ' + problem)
    pl.scatter(pca_2d[:, 0], pca_2d[:, 1], c=['black'])
    kmeans = KMeans(n_clusters=3)
    kmeans.fit(data)
    pl.figure('K-means with 2 clusters ' + problem)
    pl.scatter(pca_2d[:, 0], pca_2d[:, 1], c=['navy', 'darkorange', 'green'], alpha=0.4)
    pl.legend()
    pl.show()
    '''

    reduced_data = PCA(n_components=2).fit_transform(data)
    kmeans = KMeans(init='k-means++', n_clusters=k, n_init=10)
    kmeans.fit(reduced_data)

    # Step size of the mesh. Decrease to increase the quality of the VQ.
    h = .02  # point in the mesh [x_min, x_max]x[y_min, y_max].

    # Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # Obtain labels for each point in mesh. Use last trained model.
    Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    pl.figure(1)
    pl.clf()
    pl.imshow(Z,
              interpolation='nearest',
              extent=(xx.min(), xx.max(), yy.min(), yy.max()),
              cmap=pl.cm.Paired,
              aspect='auto',
              origin='lower')

    pl.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    # Plot the centroids as a white X
    centroids = kmeans.cluster_centers_
    pl.scatter(centroids[:, 0],
               centroids[:, 1],
               marker='x',
               s=169,
               linewidths=3,
               color='w',
               zorder=10)
    pl.title('K-means clustering on the ' + problem +
             ' dataset (PCA-reduced data)\n'
             'Centroids are marked with white cross')
    pl.xlim(x_min, x_max)
    pl.ylim(y_min, y_max)
    pl.xticks(())
    pl.yticks(())
    pl.show()
Beispiel #2
0
def cluster(x, y, n):
    # data generation
    kmeans = KMeans(n_clusters=n)
    kmeans.fit(x)
    y_kmeans = kmeans.predict(x)
    plt.scatter(x[:, 0], y[:, 0], c=y_kmeans, s=50, cmap='viridis')
    plt.show()
    centers = kmeans.cluster_centers_
    print(centers)
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
    plt.show()
Beispiel #3
0
y = plt.imread('test.jpg')
imgplot = plt.imshow(y)
plt.show()

X = plt.imread('test_scrambled.jpg')
imgplot = plt.imshow(X)
plt.show()

new_X = X.reshape((-1, 3))

print(X.shape)
print(new_X.shape)

kmeans = KMeans(n_clusters=10, random_state=0).fit(new_X)

print(kmeans.labels_.shape)

kmeans.predict([[100, 100, 100]])

#kmeans.cluster_centers_

new_kmeans = kmeans.labels_.argsort()
sorted_X = new_X[new_kmeans]
new_kmeans = sorted_X.reshape((X.shape[0], X.shape[1], 3))
plt.imshow(new_kmeans)

type(new_kmeans)
plt.imshow(new_kmeans)

plt.show()
Beispiel #4
0
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage, leaves_list, optimal_leaf_ordering
import matplotlib.pyplot as plt
import pandas as pd
plt.style.use('ggplot')
from sklearn.cluster import KMeans
from scipy.cluster.vq import kmeans,vq

df=pd.read_table(sys.argv[1], sep = "\t", header = 0, index_col = 0).loc[:, ("CFU", "poly")]
array = df.values
col_names = df.columns.values.tolist()
#print(df)
Z = linkage(array, 'ward')
kmeans = KMeans(n_clusters = 4)
kmeans.fit(Z)
y_means = kmeans.predict(Z)
fig, ax = plt.subplots()
plt.scatter(Z[:, 0], Z[:, 1], c = y_means, s=50, cmap = "viridis")
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c = "black", s=200, alpha = 0.5)
fig.savefig("kmeans.png")
plt.close(fig)



#kmeans = scipy.cluster.vq.kmeans(Z, 2)
#centroids, _ = kmeans(Z, 2)
#idx, _ = vq(Z, centroids)
#plot(data[idx==0,0], data[idx==0,1], "ob",
       # data[idx==1,0],data[idx==1,1], "or")
Beispiel #5
0
        random_state=np.random.randint(1)).fit_transform(features_data)
    kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10)
    kmeans.fit(reduced_data)

    # Step size of the mesh. Decrease to increase the quality of the VQ.
    h = .005  # point in the mesh [x_min, x_max]x[y_min, y_max].

    # execute: python3 cluster-features-alberto.py seed file n_clusters
    # Plot the decision boundary. For that, we will assign a color to each
    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    # Obtain labels for each point in mesh. Use last trained model.
    Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    fig, ax = plt.subplots()
    plt.imshow(Z,
               interpolation='nearest',
               extent=(xx.min(), xx.max(), yy.min(), yy.max()),
               cmap=plt.cm.Paired,
               aspect='auto',
               origin='lower')

    # Plot the centroids as a white x.
    centroids = kmeans.cluster_centers_
    plt.scatter(centroids[:, 0],
                centroids[:, 1],
Beispiel #6
0
labels = kmeans.labels_

##############REVISION EXAMPLE########################
#-------------------- Generating Synthetic Data -------------#
X, y_true = make_blobs(n_samples=300,
                       n_features=3,
                       centers=4,
                       cluster_std=0.70,
                       random_state=0)
x_ax = X[:, 0]
y_ax = X[:, 1]
z_ax = X[:, 2]
#-------------------------- KMEAN ---------------------------#
kmeans = KMeans(n_clusters=4)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
centers = kmeans.cluster_centers_
#-------------------------- Plotting ----------------------- #
fig = plt.figure()
ax = fig.add_subplot(121, projection='3d')
ax.scatter(x_ax, y_ax, z_ax, s=150)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
ax2 = fig.add_subplot(122, projection='3d')
ax2.scatter(x_ax, y_ax, z_ax, c=y_kmeans, s=100, cmap='viridis')
ax2.set_xlabel('X Label')
ax2.set_ylabel('Y Label')
ax2.set_zlabel('Z Label')
ax2.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c='black', s=200)
plt.show()
Beispiel #7
0
print(data.shape)
data.head()

# Getting the values and plotting it
f1 = data['V1'].values
f2 = data['V2'].values
X = np.array(list(zip(f1, f2)))
#plt.scatter(f1, f2, c='black', s=7)
#====

# Number of clusters
kmeans = KMeans(n_clusters=3)
# Fitting the input data
kmeans = kmeans.fit(X)
# Getting the cluster labels
labels = kmeans.predict(X)
# Centroid values
centroids = kmeans.cluster_centers_
print(centroids) # From sci-kit learn

fig = plt.figure()
ax = Axes3D(fig)
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c='y',s=100)
ax.scatter(centroids[:, 0], centroids[:, 1], centroids[:, 2], marker='*', c='#050505', s=1000)
#====================================================================
# example 2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
Beispiel #8
0
# print(centroids)
result, _ = vq(whiten, centroids)  # vq矢量量化函数 对每个数据进行归类
print(result)

# Scikit-learn 开源机器学习模块 提供了各种机器学习算法的接口
import numpy as np
from sklearn.cluster import KMeans
list1 = [88.0, 74.0, 96.0, 85.0]
list2 = [92.0, 99.0, 95.0, 94.0]
list3 = [91.0, 87.0, 99.0, 95.0]
list4 = [78.0, 99.0, 97.0, 81.0]
list5 = [88.0, 78.0, 98.0, 84.0]
list6 = [100.0, 95.0, 100.0, 92.0]
X = np.array([list1, list2, list3, list4, list5, list6])
kmeans = KMeans(n_clusters=2).fit(X)  # fit 对KMeans确定类别之后的数据集 进行聚类
pred = kmeans.predict(X)  # predict 根据聚类对结果确定数据所属的类别
print(pred)

# 分类:训练集、测试集
from sklearn import datasets
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)
digits = datasets.load_digits()
clf.fit(digits.data[:-1], digits.target[:-1])
result = clf.predict([digits.data[-1]])
print(result)

# 基于10只道指成分股近一年相邻两天的收盘价涨跌数据规律对它们进行聚类
import requests
import re
import json
Beispiel #9
0
def sklearnCluster(numpyArray, num):
    kmeans = KMeans(n_clusters=num).fit(numpyArray)
    cluster = kmeans.predict(numpyArray)
    return cluster