def kmeans_classification_builder(centroid_func, x_train, x_test, y_train,
                                  y_test):

    # plot some train data
    N = 25
    l = int(np.ceil(np.sqrt(N)))

    im = np.zeros((10 * l, 10 * l))
    for m in range(l):
        for n in range(l):
            if (m * l + n < N):
                im[10 * m:10 * m + 8,
                   10 * n:10 * n + 8] = x_train[m * l + n].reshape([8, 8])
    plt.imsave('plots/digits.png', im, cmap='Greys')

    n_cluster = 10
    classifier = KMeansClassifier(n_cluster=n_cluster, max_iter=100, e=1e-6)

    classifier.fit(x_train, y_train, centroid_func)
    y_hat_test = classifier.predict(x_test)
    #    print(y_test[0])
    #    print(len(y_test))

    assert y_hat_test.shape == y_test.shape, \
        'y_hat_test and y_test should have same shape'

    print(
        '[*] Prediction accuracy of K-means classifier with {} cluster is {}'.
        format(n_cluster, np.mean(y_hat_test == y_test)))
Exemple #2
0
def kmeans_classification():
    x_train, x_test, y_train, y_test = load_digits()

    # plot some train data
    N = 25
    l = int(np.ceil(np.sqrt(N)))
    #print(l)

    im = np.zeros((10 * l, 10 * l))
    for m in range(l):
        for n in range(l):
            if (m * l + n < N):
                im[10 * m:10 * m + 8,
                   10 * n:10 * n + 8] = x_train[m * l + n].reshape([8, 8])
    plt.imsave('plots/digits.png', im, cmap='Greys')

    n_cluster = 30
    classifier = KMeansClassifier(n_cluster=n_cluster, max_iter=100, e=1e-6)

    classifier.fit(x_train, y_train)
    y_hat_test = classifier.predict(x_test)

    assert y_hat_test.shape == y_test.shape, \
        'y_hat_test and y_test should have same shape'

    print('Prediction accuracy of K-means classifier with {} cluster is {}'.
          format(n_cluster, np.mean(y_hat_test == y_test)))

    linear_classifier = LogisticRegression()
    linear_classifier.fit(x_train, y_train)
    y_hat_test = linear_classifier.predict(x_test)
    print('Accuracy of logistic regression classifier is {}'.format(
        np.mean(y_hat_test == y_test)))

    KNNClassifier = KNeighborsClassifier()
    KNNClassifier.fit(x_train, y_train)
    y_hat_test = KNNClassifier.predict(x_test)
    print('Accuracy of Nearest Neighbour classifier is {}'.format(
        np.mean(y_hat_test == y_test)))

    np.savez('results/k_means_classification.npz',
             y_hat_test=y_hat_test,
             y_test=y_test,
             centroids=classifier.centroids,
             centroid_labels=classifier.centroid_labels)
Exemple #3
0
    #k = 4
    k=3
    print(data_X)
    #print(data_X[0])
    clf = KMeansClassifier(k)
    clf.fit(data_X)
    cents = clf._centroids
    labels = clf._labels
    sse = clf._sse
    colors = ['red','purple','darkgreen','darkgray','darksalmon','darkred','olive','yellow','yellowgreen',
'silver','cyan','pink','orangered','orange','navy','magenta','lightgoldenrodyellow',
'lavenderblush','honeydew','mediumseagreen']  

    
    print(cents)
    pred =  clf.predict(data_X)
    print(pred)
    print("The labels is:",labels)
    print(traininglabel)
    colorlist = []
    colorlistreal = []
    ARI = ARIcaculate(traininglabel,pred)
    for i in range(len(data_X)):
       colorlist.append(colors[int(pred[i])])
       colorlistreal.append(colors[int(traininglabel[i])])
    drawcluter_result(data_X,colorlist,sse,ARI)
    drawcluter_real(data_X,colorlistreal)
    #print(sse)
    plt.title("SSE={:.2f},ARI={:.4f}".format(sse,ARI))
    plt.axis([-7,7,-7,7])
    outname = "./result/k_clusters" + str(k) + ".png"