def kmeans_classification(): print("[+] K-Means Classification") x_train, x_test, y_train, y_test = load_digits() print("[+] K-Means Vanilla") kmeans_classification_builder(k_vanilla, x_train, x_test, y_train, y_test) print() print("[+] K-Means Plus Plus") kmeans_classification_builder(k_plus, x_train, x_test, y_train, y_test) try: linear_classifier = LogisticRegression() linear_classifier.fit(x_train, y_train) y_hat_test = linear_classifier.predict(x_test) except: pass print('[*] Accuracy of logistic regression classifier is {}' .format(np.mean(y_hat_test == y_test))) KNNClassifier = KNeighborsClassifier() KNNClassifier.fit(x_train, y_train) y_hat_test = KNNClassifier.predict(x_test) print('[*] Accuracy of Nearest Neighbour classifier is {}' .format(np.mean(y_hat_test == y_test)))
def kmeans_classification(): x_train, x_test, y_train, y_test = load_digits() # plot some train data N = 25 l = int(np.ceil(np.sqrt(N))) #print(l) im = np.zeros((10 * l, 10 * l)) for m in range(l): for n in range(l): if (m * l + n < N): im[10 * m:10 * m + 8, 10 * n:10 * n + 8] = x_train[m * l + n].reshape([8, 8]) plt.imsave('plots/digits.png', im, cmap='Greys') n_cluster = 30 classifier = KMeansClassifier(n_cluster=n_cluster, max_iter=100, e=1e-6) classifier.fit(x_train, y_train) y_hat_test = classifier.predict(x_test) assert y_hat_test.shape == y_test.shape, \ 'y_hat_test and y_test should have same shape' print('Prediction accuracy of K-means classifier with {} cluster is {}'. format(n_cluster, np.mean(y_hat_test == y_test))) linear_classifier = LogisticRegression() linear_classifier.fit(x_train, y_train) y_hat_test = linear_classifier.predict(x_test) print('Accuracy of logistic regression classifier is {}'.format( np.mean(y_hat_test == y_test))) KNNClassifier = KNeighborsClassifier() KNNClassifier.fit(x_train, y_train) y_hat_test = KNNClassifier.predict(x_test) print('Accuracy of Nearest Neighbour classifier is {}'.format( np.mean(y_hat_test == y_test))) np.savez('results/k_means_classification.npz', y_hat_test=y_hat_test, y_test=y_test, centroids=classifier.centroids, centroid_labels=classifier.centroid_labels)
# fig.ax.scatter(gmm.means[:, 0], gmm.means[:, 1], c='red') for component in range(n_cluster): a, b, angle = compute_elipse_params(gmm.variances[component]) e = Ellipse(xy=gmm.means[component], width=a * 5, height=b * 5, angle=angle, alpha=gmm.pi_k[component]) fig.ax.add_artist(e) fig.savefig('plots/gmm_toy_dataset_{}.png'.format(i)) ################################################################################ # GMM on digits dataset # We fit a gaussian distribution on digits dataset and show generate samples from the distribution # Complete implementation of sample function for GMM class in gmm.py ################################################################################ x_train, x_test, y_train, y_test = load_digits() print('x_test:',x_test.shape) for i in init: n_cluster = 30 gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-10) iterations = gmm.fit(x_train) ll = gmm.compute_log_likelihood(x_train) print('GMM for digits dataset with {} init converged in {} iterations. Final log-likelihood of data: {}'.format(i, iterations, ll)) # plot cluster means means = gmm.means from matplotlib import pyplot as plt l = int(np.ceil(np.sqrt(n_cluster))) im = np.zeros((10 * l, 10 * l))