Exemple #1
0
def kmeans_toy():
    x, y = toy_dataset(4)
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    fig.savefig('plots/toy_dataset_real_labels.png')

    fig.ax.scatter(x[:, 0], x[:, 1])
    fig.savefig('plots/toy_dataset.png')
    n_cluster = 4
    k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8)
    centroids, membership, i = k_means.fit(x)

    assert centroids.shape == (n_cluster, 2), \
        ('centroids for toy dataset should be numpy array of size {} X 2'
            .format(n_cluster))

    assert membership.shape == (50 * n_cluster,), \
        'membership for toy dataset should be a vector of size 200'

    assert type(i) == int and i > 0,  \
        'Number of updates for toy datasets should be integer and positive'

    print('[success] : kmeans clustering done on toy dataset')
    print('Toy dataset K means clustering converged in {} steps'.format(i))

    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=membership)
    fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red')
    fig.savefig('plots/toy_dataset_predicted_labels.png')

    np.savez('results/k_means_toy.npz',
             centroids=centroids,
             step=i,
             membership=membership,
             y=y)
def kmeans_builder(centroid_func):
    samples_per_cluster = 50
    n_cluster = 9

    x, y = toy_dataset(n_cluster, samples_per_cluster)
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    fig.savefig('plots/toy_dataset_real_labels.png')

    fig.ax.scatter(x[:, 0], x[:, 1])
    fig.savefig('plots/toy_dataset.png')

    k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8)

    centroids, membership, i = k_means.fit(x, centroid_func)



    assert centroids.shape == (n_cluster, 2), \
        ('centroids for toy dataset should be numpy array of size {} X 2'
            .format(n_cluster))

    assert membership.shape == (samples_per_cluster * n_cluster,), \
        'membership for toy dataset should be a vector of size {}'.format(len(membership))

    assert type(i) == int and i > 0,  \
        'Number of updates for toy datasets should be integer and positive'

    print('[success] : kmeans clustering done on toy dataset')
    print('Toy dataset K means clustering converged in {} steps'.format(i))

    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=membership)
    fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red')
    fig.savefig('plots/toy_dataset_predicted_labels.png')
Exemple #3
0
def kmeans_builder(centroid_func):
    samples_per_cluster = 50
    n_cluster = 9

    x, y = toy_dataset(n_cluster, samples_per_cluster)
    # print("x: ", x)
    # print("y: ", y)

    # plot the scatter plot with color coded by cluster index
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=y)
    fig.savefig('plots/toy_dataset_real_labels.png')

    fig.ax.scatter(x[:, 0], x[:, 1])
    fig.savefig('plots/toy_dataset.png')

    # create a class kmeans
    k_means = KMeans(n_cluster=n_cluster, max_iter=100, e=1e-8)

    # fit the kmeans to data x using centroid_func to initialize
    centroids, membership, i = k_means.fit(x, centroid_func)



    assert centroids.shape == (n_cluster, 2), \
        ('centroids for toy dataset should be numpy array of size {} X 2'
            .format(n_cluster))

    assert membership.shape == (samples_per_cluster * n_cluster,), \
        'membership for toy dataset should be a vector of size {}'.format(len(membership))

    assert type(i) == int and i > 0,  \
        'Number of updates for toy datasets should be integer and positive'

    print('[success] : kmeans clustering done on toy dataset')
    print('Toy dataset K means clustering converged in {} steps'.format(i))

    # plot toy dataset labelling using OUR method
    fig = Figure()
    fig.ax.scatter(x[:, 0], x[:, 1], c=membership)
    fig.ax.scatter(centroids[:, 0], centroids[:, 1], c='red')
    # plt.show()
    fig.savefig('plots/toy_dataset_predicted_labels.png')
Exemple #4
0
        lambda1, lambda2 = eig
    else:
        lambda2, lambda1 = eig

    angle = np.arctan(b / (a - c)) / 2
    return np.sqrt(1 / lambda1), np.sqrt(1 / lambda2), angle


################################################################################
# GMM on 2D toy dataset
# The dataset is generated from N gaussian distributions equally spaced on N radius circle.
# Here, N=4
# You should be able to visualize the learnt gaussian distribution in plots folder
# Complete implementation of fit function for GMM class in gmm.py
################################################################################
x, y = toy_dataset(4, 100)
init = ['k_means', 'random']

for i in init:
    n_cluster = 4
    gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-6)
    iterations = gmm.fit(x)
    ll = gmm.compute_log_likelihood(x)

    assert gmm.means.shape == (
        n_cluster, 2), 'means should be numpy array with {}X2 shape'.format(n_cluster)

    assert gmm.variances.shape == (
        n_cluster, 2, 2), 'variances should be numpy array with {}X2X2 shape'.format(n_cluster)

    assert gmm.pi_k.shape == (