def kmeans_toy(): x, y = toy_dataset(4) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) fig.savefig('plots/toy_dataset_real_labels.png') fig.ax.scatter(x[:, 0], x[:, 1]) fig.savefig('plots/toy_dataset.png') n_cluster = 4 k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8) centroids, membership, i = k_means.fit(x) assert centroids.shape == (n_cluster, 2), \ ('centroids for toy dataset should be numpy array of size {} X 2' .format(n_cluster)) assert membership.shape == (50 * n_cluster,), \ 'membership for toy dataset should be a vector of size 200' assert type(i) == int and i > 0, \ 'Number of updates for toy datasets should be integer and positive' print('[success] : kmeans clustering done on toy dataset') print('Toy dataset K means clustering converged in {} steps'.format(i)) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=membership) fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red') fig.savefig('plots/toy_dataset_predicted_labels.png') np.savez('results/k_means_toy.npz', centroids=centroids, step=i, membership=membership, y=y)
def kmeans_builder(centroid_func): samples_per_cluster = 50 n_cluster = 9 x, y = toy_dataset(n_cluster, samples_per_cluster) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) fig.savefig('plots/toy_dataset_real_labels.png') fig.ax.scatter(x[:, 0], x[:, 1]) fig.savefig('plots/toy_dataset.png') k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8) centroids, membership, i = k_means.fit(x, centroid_func) assert centroids.shape == (n_cluster, 2), \ ('centroids for toy dataset should be numpy array of size {} X 2' .format(n_cluster)) assert membership.shape == (samples_per_cluster * n_cluster,), \ 'membership for toy dataset should be a vector of size {}'.format(len(membership)) assert type(i) == int and i > 0, \ 'Number of updates for toy datasets should be integer and positive' print('[success] : kmeans clustering done on toy dataset') print('Toy dataset K means clustering converged in {} steps'.format(i)) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=membership) fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red') fig.savefig('plots/toy_dataset_predicted_labels.png')
def kmeans_builder(centroid_func): samples_per_cluster = 50 n_cluster = 9 x, y = toy_dataset(n_cluster, samples_per_cluster) # print("x: ", x) # print("y: ", y) # plot the scatter plot with color coded by cluster index fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) fig.savefig('plots/toy_dataset_real_labels.png') fig.ax.scatter(x[:, 0], x[:, 1]) fig.savefig('plots/toy_dataset.png') # create a class kmeans k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8) # fit the kmeans to data x using centroid_func to initialize centroids, membership, i = k_means.fit(x, centroid_func) assert centroids.shape == (n_cluster, 2), \ ('centroids for toy dataset should be numpy array of size {} X 2' .format(n_cluster)) assert membership.shape == (samples_per_cluster * n_cluster,), \ 'membership for toy dataset should be a vector of size {}'.format(len(membership)) assert type(i) == int and i > 0, \ 'Number of updates for toy datasets should be integer and positive' print('[success] : kmeans clustering done on toy dataset') print('Toy dataset K means clustering converged in {} steps'.format(i)) # plot toy dataset labelling using OUR method fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=membership) fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red') # plt.show() fig.savefig('plots/toy_dataset_predicted_labels.png')
print('GMM for toy dataset with {} init converged in {} iteration. Final log-likelihood of data: {}'.format( i, iterations, ll)) np.savez('results/gmm_toy_{}.npz'.format(i), iterations=iterations, variances=gmm.variances, pi_k=gmm.pi_k, means=gmm.means, log_likelihood=ll, x=x, y=y) # plot fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) # fig.ax.scatter(gmm.means[:, 0], gmm.means[:, 1], c='red') for component in range(n_cluster): a, b, angle = compute_elipse_params(gmm.variances[component]) e = Ellipse(xy=gmm.means[component], width=a * 5, height=b * 5, angle=angle, alpha=gmm.pi_k[component]) fig.ax.add_artist(e) fig.savefig('plots/gmm_toy_dataset_{}.png'.format(i)) ################################################################################ # GMM on digits dataset # We fit a gaussian distribution on digits dataset and show generate samples from the distribution # Complete implementation of sample function for GMM class in gmm.py ################################################################################ x_train, x_test, y_train, y_test = load_digits() print('x_test:',x_test.shape) for i in init: n_cluster = 30 gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-10) iterations = gmm.fit(x_train)