def visualize_clusters(data, target, problem, k): ''' pca = PCA(n_components=2).fit(data) pca_2d = pca.transform(data) # now visualize classified data in new projected space pl.figure('Reference Plot ' + problem) pl.scatter(pca_2d[:, 0], pca_2d[:, 1], c=['black']) kmeans = KMeans(n_clusters=3) kmeans.fit(data) pl.figure('K-means with 2 clusters ' + problem) pl.scatter(pca_2d[:, 0], pca_2d[:, 1], c=['navy', 'darkorange', 'green'], alpha=0.4) pl.legend() pl.show() ''' reduced_data = PCA(n_components=2).fit_transform(data) kmeans = KMeans(init='k-means++', n_clusters=k, n_init=10) kmeans.fit(reduced_data) # Step size of the mesh. Decrease to increase the quality of the VQ. h = .02 # point in the mesh [x_min, x_max]x[y_min, y_max]. # Plot the decision boundary. For that, we will assign a color to each x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Obtain labels for each point in mesh. Use last trained model. Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) pl.figure(1) pl.clf() pl.imshow(Z, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()), cmap=pl.cm.Paired, aspect='auto', origin='lower') pl.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2) # Plot the centroids as a white X centroids = kmeans.cluster_centers_ pl.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=169, linewidths=3, color='w', zorder=10) pl.title('K-means clustering on the ' + problem + ' dataset (PCA-reduced data)\n' 'Centroids are marked with white cross') pl.xlim(x_min, x_max) pl.ylim(y_min, y_max) pl.xticks(()) pl.yticks(()) pl.show()
def cluster(x, y, n): # data generation kmeans = KMeans(n_clusters=n) kmeans.fit(x) y_kmeans = kmeans.predict(x) plt.scatter(x[:, 0], y[:, 0], c=y_kmeans, s=50, cmap='viridis') plt.show() centers = kmeans.cluster_centers_ print(centers) plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5) plt.show()
y = plt.imread('test.jpg') imgplot = plt.imshow(y) plt.show() X = plt.imread('test_scrambled.jpg') imgplot = plt.imshow(X) plt.show() new_X = X.reshape((-1, 3)) print(X.shape) print(new_X.shape) kmeans = KMeans(n_clusters=10, random_state=0).fit(new_X) print(kmeans.labels_.shape) kmeans.predict([[100, 100, 100]]) #kmeans.cluster_centers_ new_kmeans = kmeans.labels_.argsort() sorted_X = new_X[new_kmeans] new_kmeans = sorted_X.reshape((X.shape[0], X.shape[1], 3)) plt.imshow(new_kmeans) type(new_kmeans) plt.imshow(new_kmeans) plt.show()
import numpy as np from scipy.cluster.hierarchy import dendrogram, linkage, leaves_list, optimal_leaf_ordering import matplotlib.pyplot as plt import pandas as pd plt.style.use('ggplot') from sklearn.cluster import KMeans from scipy.cluster.vq import kmeans,vq df=pd.read_table(sys.argv[1], sep = "\t", header = 0, index_col = 0).loc[:, ("CFU", "poly")] array = df.values col_names = df.columns.values.tolist() #print(df) Z = linkage(array, 'ward') kmeans = KMeans(n_clusters = 4) kmeans.fit(Z) y_means = kmeans.predict(Z) fig, ax = plt.subplots() plt.scatter(Z[:, 0], Z[:, 1], c = y_means, s=50, cmap = "viridis") centers = kmeans.cluster_centers_ plt.scatter(centers[:, 0], centers[:, 1], c = "black", s=200, alpha = 0.5) fig.savefig("kmeans.png") plt.close(fig) #kmeans = scipy.cluster.vq.kmeans(Z, 2) #centroids, _ = kmeans(Z, 2) #idx, _ = vq(Z, centroids) #plot(data[idx==0,0], data[idx==0,1], "ob", # data[idx==1,0],data[idx==1,1], "or")
random_state=np.random.randint(1)).fit_transform(features_data) kmeans = KMeans(init='k-means++', n_clusters=num_clusters, n_init=10) kmeans.fit(reduced_data) # Step size of the mesh. Decrease to increase the quality of the VQ. h = .005 # point in the mesh [x_min, x_max]x[y_min, y_max]. # execute: python3 cluster-features-alberto.py seed file n_clusters # Plot the decision boundary. For that, we will assign a color to each x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Obtain labels for each point in mesh. Use last trained model. Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) fig, ax = plt.subplots() plt.imshow(Z, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()), cmap=plt.cm.Paired, aspect='auto', origin='lower') # Plot the centroids as a white x. centroids = kmeans.cluster_centers_ plt.scatter(centroids[:, 0], centroids[:, 1],
labels = kmeans.labels_ ##############REVISION EXAMPLE######################## #-------------------- Generating Synthetic Data -------------# X, y_true = make_blobs(n_samples=300, n_features=3, centers=4, cluster_std=0.70, random_state=0) x_ax = X[:, 0] y_ax = X[:, 1] z_ax = X[:, 2] #-------------------------- KMEAN ---------------------------# kmeans = KMeans(n_clusters=4) kmeans.fit(X) y_kmeans = kmeans.predict(X) centers = kmeans.cluster_centers_ #-------------------------- Plotting ----------------------- # fig = plt.figure() ax = fig.add_subplot(121, projection='3d') ax.scatter(x_ax, y_ax, z_ax, s=150) ax.set_xlabel('X Label') ax.set_ylabel('Y Label') ax.set_zlabel('Z Label') ax2 = fig.add_subplot(122, projection='3d') ax2.scatter(x_ax, y_ax, z_ax, c=y_kmeans, s=100, cmap='viridis') ax2.set_xlabel('X Label') ax2.set_ylabel('Y Label') ax2.set_zlabel('Z Label') ax2.scatter(centers[:, 0], centers[:, 1], centers[:, 2], c='black', s=200) plt.show()
print(data.shape) data.head() # Getting the values and plotting it f1 = data['V1'].values f2 = data['V2'].values X = np.array(list(zip(f1, f2))) #plt.scatter(f1, f2, c='black', s=7) #==== # Number of clusters kmeans = KMeans(n_clusters=3) # Fitting the input data kmeans = kmeans.fit(X) # Getting the cluster labels labels = kmeans.predict(X) # Centroid values centroids = kmeans.cluster_centers_ print(centroids) # From sci-kit learn fig = plt.figure() ax = Axes3D(fig) ax.scatter(X[:, 0], X[:, 1], X[:, 2], c='y',s=100) ax.scatter(centroids[:, 0], centroids[:, 1], centroids[:, 2], marker='*', c='#050505', s=1000) #==================================================================== # example 2 import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from sklearn.cluster import KMeans from sklearn.datasets import make_blobs
# print(centroids) result, _ = vq(whiten, centroids) # vq矢量量化函数 对每个数据进行归类 print(result) # Scikit-learn 开源机器学习模块 提供了各种机器学习算法的接口 import numpy as np from sklearn.cluster import KMeans list1 = [88.0, 74.0, 96.0, 85.0] list2 = [92.0, 99.0, 95.0, 94.0] list3 = [91.0, 87.0, 99.0, 95.0] list4 = [78.0, 99.0, 97.0, 81.0] list5 = [88.0, 78.0, 98.0, 84.0] list6 = [100.0, 95.0, 100.0, 92.0] X = np.array([list1, list2, list3, list4, list5, list6]) kmeans = KMeans(n_clusters=2).fit(X) # fit 对KMeans确定类别之后的数据集 进行聚类 pred = kmeans.predict(X) # predict 根据聚类对结果确定数据所属的类别 print(pred) # 分类:训练集、测试集 from sklearn import datasets from sklearn import svm clf = svm.SVC(gamma=0.001, C=100.) digits = datasets.load_digits() clf.fit(digits.data[:-1], digits.target[:-1]) result = clf.predict([digits.data[-1]]) print(result) # 基于10只道指成分股近一年相邻两天的收盘价涨跌数据规律对它们进行聚类 import requests import re import json
def sklearnCluster(numpyArray, num): kmeans = KMeans(n_clusters=num).fit(numpyArray) cluster = kmeans.predict(numpyArray) return cluster