예제 #1
0
def kmeans_classification():
    print("[+] K-Means Classification")

    x_train, x_test, y_train, y_test = load_digits()

    print("[+] K-Means Vanilla")
    kmeans_classification_builder(k_vanilla, x_train, x_test, y_train, y_test)
    print()

    print("[+] K-Means Plus Plus")
    kmeans_classification_builder(k_plus, x_train, x_test, y_train, y_test)

    try:
        linear_classifier = LogisticRegression()
        linear_classifier.fit(x_train, y_train)
        y_hat_test = linear_classifier.predict(x_test)
    except:
	    pass
    print('[*] Accuracy of logistic regression classifier is {}'
          .format(np.mean(y_hat_test == y_test)))

    KNNClassifier = KNeighborsClassifier()
    KNNClassifier.fit(x_train, y_train)
    y_hat_test = KNNClassifier.predict(x_test)
    print('[*] Accuracy of Nearest Neighbour classifier is {}'
          .format(np.mean(y_hat_test == y_test)))
예제 #2
0
def kmeans_classification():
    x_train, x_test, y_train, y_test = load_digits()

    # plot some train data
    N = 25
    l = int(np.ceil(np.sqrt(N)))
    #print(l)

    im = np.zeros((10 * l, 10 * l))
    for m in range(l):
        for n in range(l):
            if (m * l + n < N):
                im[10 * m:10 * m + 8,
                   10 * n:10 * n + 8] = x_train[m * l + n].reshape([8, 8])
    plt.imsave('plots/digits.png', im, cmap='Greys')

    n_cluster = 30
    classifier = KMeansClassifier(n_cluster=n_cluster, max_iter=100, e=1e-6)

    classifier.fit(x_train, y_train)
    y_hat_test = classifier.predict(x_test)

    assert y_hat_test.shape == y_test.shape, \
        'y_hat_test and y_test should have same shape'

    print('Prediction accuracy of K-means classifier with {} cluster is {}'.
          format(n_cluster, np.mean(y_hat_test == y_test)))

    linear_classifier = LogisticRegression()
    linear_classifier.fit(x_train, y_train)
    y_hat_test = linear_classifier.predict(x_test)
    print('Accuracy of logistic regression classifier is {}'.format(
        np.mean(y_hat_test == y_test)))

    KNNClassifier = KNeighborsClassifier()
    KNNClassifier.fit(x_train, y_train)
    y_hat_test = KNNClassifier.predict(x_test)
    print('Accuracy of Nearest Neighbour classifier is {}'.format(
        np.mean(y_hat_test == y_test)))

    np.savez('results/k_means_classification.npz',
             y_hat_test=y_hat_test,
             y_test=y_test,
             centroids=classifier.centroids,
             centroid_labels=classifier.centroid_labels)
예제 #3
0
    # fig.ax.scatter(gmm.means[:, 0], gmm.means[:, 1], c='red')
    for component in range(n_cluster):
        a, b, angle = compute_elipse_params(gmm.variances[component])
        e = Ellipse(xy=gmm.means[component], width=a * 5, height=b * 5,
                    angle=angle, alpha=gmm.pi_k[component])
        fig.ax.add_artist(e)
    fig.savefig('plots/gmm_toy_dataset_{}.png'.format(i))


################################################################################
# GMM on digits dataset
# We fit a gaussian distribution on digits dataset and show generate samples from the distribution
# Complete implementation of sample function for GMM class in gmm.py
################################################################################

x_train, x_test, y_train, y_test = load_digits()
print('x_test:',x_test.shape)

for i in init:
    n_cluster = 30
    gmm = GMM(n_cluster=n_cluster, max_iter=1000, init=i, e=1e-10)
    iterations = gmm.fit(x_train)
    ll = gmm.compute_log_likelihood(x_train)
    print('GMM for digits dataset with {} init converged in {} iterations. Final log-likelihood of data: {}'.format(i, iterations, ll))

    # plot cluster means
    means = gmm.means
    from matplotlib import pyplot as plt
    l = int(np.ceil(np.sqrt(n_cluster)))

    im = np.zeros((10 * l, 10 * l))