def kmeans_classification_builder(centroid_func, x_train, x_test, y_train, y_test): # plot some train data N = 25 l = int(np.ceil(np.sqrt(N))) im = np.zeros((10 * l, 10 * l)) for m in range(l): for n in range(l): if (m * l + n < N): im[10 * m:10 * m + 8, 10 * n:10 * n + 8] = x_train[m * l + n].reshape([8, 8]) plt.imsave('plots/digits.png', im, cmap='Greys') n_cluster = 10 classifier = KMeansClassifier(n_cluster=n_cluster, max_iter=100, e=1e-6) classifier.fit(x_train, y_train, centroid_func) y_hat_test = classifier.predict(x_test) # print(y_test[0]) # print(len(y_test)) assert y_hat_test.shape == y_test.shape, \ 'y_hat_test and y_test should have same shape' print( '[*] Prediction accuracy of K-means classifier with {} cluster is {}'. format(n_cluster, np.mean(y_hat_test == y_test)))
def kmeans_classification(): x_train, x_test, y_train, y_test = load_digits() # plot some train data N = 25 l = int(np.ceil(np.sqrt(N))) #print(l) im = np.zeros((10 * l, 10 * l)) for m in range(l): for n in range(l): if (m * l + n < N): im[10 * m:10 * m + 8, 10 * n:10 * n + 8] = x_train[m * l + n].reshape([8, 8]) plt.imsave('plots/digits.png', im, cmap='Greys') n_cluster = 30 classifier = KMeansClassifier(n_cluster=n_cluster, max_iter=100, e=1e-6) classifier.fit(x_train, y_train) y_hat_test = classifier.predict(x_test) assert y_hat_test.shape == y_test.shape, \ 'y_hat_test and y_test should have same shape' print('Prediction accuracy of K-means classifier with {} cluster is {}'. format(n_cluster, np.mean(y_hat_test == y_test))) linear_classifier = LogisticRegression() linear_classifier.fit(x_train, y_train) y_hat_test = linear_classifier.predict(x_test) print('Accuracy of logistic regression classifier is {}'.format( np.mean(y_hat_test == y_test))) KNNClassifier = KNeighborsClassifier() KNNClassifier.fit(x_train, y_train) y_hat_test = KNNClassifier.predict(x_test) print('Accuracy of Nearest Neighbour classifier is {}'.format( np.mean(y_hat_test == y_test))) np.savez('results/k_means_classification.npz', y_hat_test=y_hat_test, y_test=y_test, centroids=classifier.centroids, centroid_labels=classifier.centroid_labels)
#k = 4 k=3 print(data_X) #print(data_X[0]) clf = KMeansClassifier(k) clf.fit(data_X) cents = clf._centroids labels = clf._labels sse = clf._sse colors = ['red','purple','darkgreen','darkgray','darksalmon','darkred','olive','yellow','yellowgreen', 'silver','cyan','pink','orangered','orange','navy','magenta','lightgoldenrodyellow', 'lavenderblush','honeydew','mediumseagreen'] print(cents) pred = clf.predict(data_X) print(pred) print("The labels is:",labels) print(traininglabel) colorlist = [] colorlistreal = [] ARI = ARIcaculate(traininglabel,pred) for i in range(len(data_X)): colorlist.append(colors[int(pred[i])]) colorlistreal.append(colors[int(traininglabel[i])]) drawcluter_result(data_X,colorlist,sse,ARI) drawcluter_real(data_X,colorlistreal) #print(sse) plt.title("SSE={:.2f},ARI={:.4f}".format(sse,ARI)) plt.axis([-7,7,-7,7]) outname = "./result/k_clusters" + str(k) + ".png"