def kmeans_toy(): x, y = toy_dataset(4) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) fig.savefig('plots/toy_dataset_real_labels.png') fig.ax.scatter(x[:, 0], x[:, 1]) fig.savefig('plots/toy_dataset.png') n_cluster = 4 k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8) centroids, membership, i = k_means.fit(x) assert centroids.shape == (n_cluster, 2), \ ('centroids for toy dataset should be numpy array of size {} X 2' .format(n_cluster)) assert membership.shape == (50 * n_cluster,), \ 'membership for toy dataset should be a vector of size 200' assert type(i) == int and i > 0, \ 'Number of updates for toy datasets should be integer and positive' print('[success] : kmeans clustering done on toy dataset') print('Toy dataset K means clustering converged in {} steps'.format(i)) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=membership) fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red') fig.savefig('plots/toy_dataset_predicted_labels.png') np.savez('results/k_means_toy.npz', centroids=centroids, step=i, membership=membership, y=y)
def kmeans_builder(centroid_func): samples_per_cluster = 50 n_cluster = 9 x, y = toy_dataset(n_cluster, samples_per_cluster) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) fig.savefig('plots/toy_dataset_real_labels.png') fig.ax.scatter(x[:, 0], x[:, 1]) fig.savefig('plots/toy_dataset.png') k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8) centroids, membership, i = k_means.fit(x, centroid_func) assert centroids.shape == (n_cluster, 2), \ ('centroids for toy dataset should be numpy array of size {} X 2' .format(n_cluster)) assert membership.shape == (samples_per_cluster * n_cluster,), \ 'membership for toy dataset should be a vector of size {}'.format(len(membership)) assert type(i) == int and i > 0, \ 'Number of updates for toy datasets should be integer and positive' print('[success] : kmeans clustering done on toy dataset') print('Toy dataset K means clustering converged in {} steps'.format(i)) fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=membership) fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red') fig.savefig('plots/toy_dataset_predicted_labels.png')
def kmeans_builder(centroid_func): samples_per_cluster = 50 n_cluster = 9 x, y = toy_dataset(n_cluster, samples_per_cluster) # print("x: ", x) # print("y: ", y) # plot the scatter plot with color coded by cluster index fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=y) fig.savefig('plots/toy_dataset_real_labels.png') fig.ax.scatter(x[:, 0], x[:, 1]) fig.savefig('plots/toy_dataset.png') # create a class kmeans k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8) # fit the kmeans to data x using centroid_func to initialize centroids, membership, i = k_means.fit(x, centroid_func) assert centroids.shape == (n_cluster, 2), \ ('centroids for toy dataset should be numpy array of size {} X 2' .format(n_cluster)) assert membership.shape == (samples_per_cluster * n_cluster,), \ 'membership for toy dataset should be a vector of size {}'.format(len(membership)) assert type(i) == int and i > 0, \ 'Number of updates for toy datasets should be integer and positive' print('[success] : kmeans clustering done on toy dataset') print('Toy dataset K means clustering converged in {} steps'.format(i)) # plot toy dataset labelling using OUR method fig = Figure() fig.ax.scatter(x[:, 0], x[:, 1], c=membership) fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red') # plt.show() fig.savefig('plots/toy_dataset_predicted_labels.png')
lambda1, lambda2 = eig else: lambda2, lambda1 = eig angle = np.arctan(b / (a - c)) / 2 return np.sqrt(1 / lambda1), np.sqrt(1 / lambda2), angle ################################################################################ # GMM on 2D toy dataset # The dataset is generated from N gaussian distributions equally spaced on N radius circle. # Here, N=4 # You should be able to visualize the learnt gaussian distribution in plots folder # Complete implementation of fit function for GMM class in gmm.py ################################################################################ x, y = toy_dataset(4, 100) init = ['k_means', 'random'] for i in init: n_cluster = 4 gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-6) iterations = gmm.fit(x) ll = gmm.compute_log_likelihood(x) assert gmm.means.shape == ( n_cluster, 2), 'means should be numpy array with {}X2 shape'.format(n_cluster) assert gmm.variances.shape == ( n_cluster, 2, 2), 'variances should be numpy array with {}X2X2 shape'.format(n_cluster) assert gmm.pi_k.shape == (