Example #1
0
def kmeans_toy():
    x, y = toy_dataset(4)
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    fig.savefig('plots/toy_dataset_real_labels.png')

    fig.ax.scatter(x[:, 0], x[:, 1])
    fig.savefig('plots/toy_dataset.png')
    n_cluster = 4
    k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8)
    centroids, membership, i = k_means.fit(x)

    assert centroids.shape == (n_cluster, 2), \
        ('centroids for toy dataset should be numpy array of size {} X 2'
            .format(n_cluster))

    assert membership.shape == (50 * n_cluster,), \
        'membership for toy dataset should be a vector of size 200'

    assert type(i) == int and i > 0,  \
        'Number of updates for toy datasets should be integer and positive'

    print('[success] : kmeans clustering done on toy dataset')
    print('Toy dataset K means clustering converged in {} steps'.format(i))

    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=membership)
    fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red')
    fig.savefig('plots/toy_dataset_predicted_labels.png')

    np.savez('results/k_means_toy.npz',
             centroids=centroids,
             step=i,
             membership=membership,
             y=y)
def kmeans_builder(centroid_func):
    samples_per_cluster = 50
    n_cluster = 9

    x, y = toy_dataset(n_cluster, samples_per_cluster)
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    fig.savefig('plots/toy_dataset_real_labels.png')

    fig.ax.scatter(x[:, 0], x[:, 1])
    fig.savefig('plots/toy_dataset.png')

    k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8)

    centroids, membership, i = k_means.fit(x, centroid_func)



    assert centroids.shape == (n_cluster, 2), \
        ('centroids for toy dataset should be numpy array of size {} X 2'
            .format(n_cluster))

    assert membership.shape == (samples_per_cluster * n_cluster,), \
        'membership for toy dataset should be a vector of size {}'.format(len(membership))

    assert type(i) == int and i > 0,  \
        'Number of updates for toy datasets should be integer and positive'

    print('[success] : kmeans clustering done on toy dataset')
    print('Toy dataset K means clustering converged in {} steps'.format(i))

    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=membership)
    fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red')
    fig.savefig('plots/toy_dataset_predicted_labels.png')
Example #3
0
def kmeans_builder(centroid_func):
    samples_per_cluster = 50
    n_cluster = 9

    x, y = toy_dataset(n_cluster, samples_per_cluster)
    # print("x: ", x)
    # print("y: ", y)

    # plot the scatter plot with color coded by cluster index
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    fig.savefig('plots/toy_dataset_real_labels.png')

    fig.ax.scatter(x[:, 0], x[:, 1])
    fig.savefig('plots/toy_dataset.png')

    # create a class kmeans
    k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8)

    # fit the kmeans to data x using centroid_func to initialize
    centroids, membership, i = k_means.fit(x, centroid_func)



    assert centroids.shape == (n_cluster, 2), \
        ('centroids for toy dataset should be numpy array of size {} X 2'
            .format(n_cluster))

    assert membership.shape == (samples_per_cluster * n_cluster,), \
        'membership for toy dataset should be a vector of size {}'.format(len(membership))

    assert type(i) == int and i > 0,  \
        'Number of updates for toy datasets should be integer and positive'

    print('[success] : kmeans clustering done on toy dataset')
    print('Toy dataset K means clustering converged in {} steps'.format(i))

    # plot toy dataset labelling using OUR method
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=membership)
    fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red')
    # plt.show()
    fig.savefig('plots/toy_dataset_predicted_labels.png')
Example #4
0
    print('GMM for toy dataset with {} init converged in {} iteration. Final log-likelihood of data: {}'.format(
        i, iterations, ll))

    np.savez('results/gmm_toy_{}.npz'.format(i), iterations=iterations,
             variances=gmm.variances, pi_k=gmm.pi_k, means=gmm.means, log_likelihood=ll, x=x, y=y)

    # plot
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    # fig.ax.scatter(gmm.means[:, 0], gmm.means[:, 1], c='red')
    for component in range(n_cluster):
        a, b, angle = compute_elipse_params(gmm.variances[component])
        e = Ellipse(xy=gmm.means[component], width=a * 5, height=b * 5,
                    angle=angle, alpha=gmm.pi_k[component])
        fig.ax.add_artist(e)
    fig.savefig('plots/gmm_toy_dataset_{}.png'.format(i))


################################################################################
# GMM on digits dataset
# We fit a gaussian distribution on digits dataset and show generate samples from the distribution
# Complete implementation of sample function for GMM class in gmm.py
################################################################################

x_train, x_test, y_train, y_test = load_digits()
print('x_test:',x_test.shape)

for i in init:
    n_cluster = 30
    gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-10)
    iterations = gmm.fit(x_train)