Esempio n. 1
0
def kmeans_clustering(test_data, K=4):
    # Returns the labels for test_data, predicted by the kMeans
    # classifier which assumes that clusters are ordered by intensity
    #
    # Input:
    # test_data          num_test x p matrix with features for the test data
    # k                  Number of clusters to take into account (2 by default)
    # Output:
    # predicted_labels    num_test x 1 predicted vector with labels for the test data

    #Link to the cost function of kMeans
    fun = lambda w: cost_kmeans(test_data, w)
    
    # the learning rate
    mu = 0.01
    
    # iterations
    num_iter = 100
    
    ##------------------------------------------------------------------#
    ## TODO: Initialize cluster centers and store them in w_initial
    N,M = test_data.shape
    idx = np.random.randint(N, size=K)
    w_initial = test_data[idx,:]
    ##------------------------------------------------------------------#
    #
    ##Reshape centers to a vector (needed by ngradient)
    #
    w_vector = w_initial.reshape(K*M, 1)
    for i in np.arange(num_iter):
        # gradient ascent
        g = util.ngradient(fun,w_vector)
        w_vector = w_vector - mu*g.T
    
    #Reshape back to dataset
    w_final = w_vector.reshape(K, M)
    
    #------------------------------------------------------------------#
    # TODO: Find distance of each point to each cluster center
    # Then find the minimum distances min_dist and indices min_index
    D = scipy.spatial.distance.cdist(test_data, w_final, metric='euclidean') #distances between X and C
    min_index = np.argmin(D, axis=1)
    min_dist = np.zeros((len(min_index),1))
    for i in range(len(min_index)):
        min_dist[i,0] = D.item((i, min_index[i]))
    
    #------------------------------------------------------------------#
    
    # Sort by intensity of cluster center
    sorted_order = np.argsort(w_final[:,0], axis=0)
    
    # Update the cluster indices based on the sorted order and return results in
    # predicted_labels
    predicted_labels = np.empty(*min_index.shape)
    predicted_labels[:] = np.nan
    
    for i in np.arange(len(sorted_order)):
        predicted_labels[min_index==sorted_order[i]] = i
    return predicted_labels
Esempio n. 2
0
def kmeans_clustering(test_data, K=2):
    # Returns the labels for test_data, predicted by the kMeans
    # classifier which assumes that clusters are ordered by intensity
    #
    # Input:
    # test_data          num_test x p matrix with features for the test data
    # k                  Number of clusters to take into account (2 by default)
    # Output:
    # predicted_labels    num_test x 1 predicted vector with labels for the test data

    print(test_data.shape)

    N, M = test_data.shape

    # link to the cost function of kMeans
    fun = lambda w: cost_kmeans(test_data, w)

    # the learning rate
    mu = 0.01

    # iterations
    num_iter = 100

    # Initialize cluster centers and store them in w_initial
    w_initial, _ = generate_gaussian_data(2)
    print(w_initial.shape)

    # Reshape centers to a vector (needed by ngradient)
    w_vector = w_initial.reshape(K * M, 1)

    for i in np.arange(num_iter):
        # gradient ascent
        w_vector = w_vector - mu * util.ngradient(fun, w_vector)

    # Reshape back to dataset
    w_final = w_vector.reshape(K, M)

    # Find min_dist and min_index
    D = scipy.spatial.distance.cdist(test_data, w_final, metric='euclidean')
    min_index = np.argmin(D, axis=1)
    min_dist = np.diagonal(D[:, min_index])

    # Sort by intensity of cluster center
    sorted_order = np.argsort(w_final[:, 0], axis=0)

    # Update the cluster indices based on the sorted order and return results in
    # predicted_labels
    predicted_labels = np.empty(*min_index.shape)
    predicted_labels[:] = np.nan

    for i in np.arange(len(sorted_order)):
        predicted_labels[min_index == sorted_order[i]] = i

    return predicted_labels
Esempio n. 3
0
def kmeans_demo():

    ## Define some data and parameters
    n = 100
    X1 = np.random.randn(n, 2)
    X2 = np.random.randn(n, 2)+5
    X = np.concatenate((X1, X2), axis=0)
    Y = np.concatenate((np.zeros((n,1)), np.ones((n,1))), axis=0)
#     ax1 = util.scatter_data(X,Y,0,1)
    N, M = X.shape

    #Define number of clusters we want
    clusters = 2;

    # the learning rate
    mu = 1;

    # iterations
    num_iter = 100

    # Cost function used by k-Means
    # fun = lambda w: seg.cost_kmeans(X,w)
    fun = funX(X)

    ## Algorithm
    #Initialize cluster centers
    idx = np.random.randint(N, size=clusters)
    initial_w = X[idx,:]
    w_draw = initial_w
    print(w_draw)

    #Reshape into vector (needed by ngradient)
    w_vector = initial_w.reshape(clusters*M, 1)

    #Vector to store cost
    xx = np.linspace(1, num_iter, num_iter)
    kmeans_cost = np.empty(*xx.shape)
    kmeans_cost[:] = np.nan

    fig = plt.figure(figsize=(14,6))
    ax1  = fig.add_subplot(121)
    im1  = ax1.scatter(X[:n,0], X[:n,1], label='X-class0')
    im2  = ax1.scatter(X[n:,0], X[n:,1], label='X-class1')
    line1, = ax1.plot(w_draw[:,0], w_draw[:,1], "or", markersize=5, label='W-vector')
    # im3  = ax1.scatter(w_draw[:,0], w_draw[:,1])
    ax1.grid()

    ax2  = fig.add_subplot(122, xlim=(0, num_iter), ylim=(0, 10))

    text_str = 'k={}, g={:.2f}\ncost={:.2f}'.format(0, 0, 0)

    txt2 = ax2.text(0.3, 0.95, text_str, bbox={'facecolor': 'green', 'alpha': 0.4, 'pad': 10},
             transform=ax2.transAxes)

#     xx = xx.reshape(1,-1)
    line2, = ax2.plot(xx, kmeans_cost, lw=2)
    ax2.set_xlabel('Iteration')
    ax2.set_ylabel('Cost')
    ax2.grid()

    for k in np.arange(num_iter):

        # gradient ascent
        g = util.ngradient(fun,w_vector)
        w_vector = w_vector - mu*g
        # calculate cost for plotting
        kmeans_cost[k] = fun(w_vector)
        text_str = 'k={}, cost={:.2f}'.format(k, kmeans_cost[k])
        txt2.set_text(text_str)
        # plot
        line2.set_ydata(kmeans_cost)
        w_draw_new = w_vector.reshape(clusters, M)
        line1.set_data(w_draw_new[:,0], w_draw_new[:,1])
        display(fig)
        clear_output(wait = True)
        plt.pause(.005)

    return kmeans_cost
Esempio n. 4
0
def kmeans_clustering(test_data, K=2):
    # Returns the labels for test_data, predicted by the kMeans
    # classifier which assumes that clusters are ordered by intensity
    #
    # Input:
    # test_data          num_test x p matrix with features for the test data
    # k                  Number of clusters to take into account (2 by default)
    # Output:
    # predicted_labels    num_test x 1 predicted vector with labels for the test data

    X_norm, _ = seg.normalize_data(test_data)
    N, M = X_norm.shape

    clusters = 4

    # link to the cost function of kMeans
    fun = lambda w: cost_kmeans(test_data, w)

    # the learning rate
    mu = 0.01

    # iterations
    num_iter = 100

    # Initialize cluster centers and store them in w_initial
    idx = np.random.randint(N, size=clusters)
    w_initial = X_norm[idx, :]

    # Reshape centers to a vector (needed by ngradient)
    w_vector = w_initial.reshape(K * M, 1)

    for i in np.arange(num_iter):
        # gradient ascent
        change = mu * util.ngradient(fun, w_vector)
        w_vector = w_vector - change[:, np.newaxis]

    # Reshape back to dataset
    w_final = w_vector.reshape(K, M)
    print(w_final.shape)
    print(w_final)

    # Find min_dist and min_index
    D = scipy.spatial.distance.cdist(test_data, w_final, metric='euclidean')
    min_index = np.argmin(D, axis=1)

    # Sort by intensity of cluster center
    sorted_order = np.argsort(w_final[:, 0], axis=0)

    # Update the cluster indices based on the sorted order and return results in predicted_labels
    predicted_labels = np.empty(*min_index.shape)
    predicted_labels[:] = np.nan

    for i in np.arange(len(sorted_order)):
        print(sorted_order[i])
        predicted_labels[min_index == sorted_order[i]] = i
        print(np.unique(predicted_labels))

    print("after loop")
    print(predicted_labels.shape)
    print(np.unique(predicted_labels))
    return predicted_labels
Esempio n. 5
0
def kmeans_no_plot(X, labels, num_iter, mu=0.1):
    #    X,_ = seg.normalize_data(X)
    N, M = X.shape
    #Define number of clusters we want
    clusters = 4

    # Cost function used by k-Means
    # fun = lambda w: seg.cost_kmeans(X,w)
    fun = funX(X)

    ## Algorithm
    #Initialize cluster centers
    idx = np.random.randint(N, size=clusters)
    initial_w = X[idx, :]
    w_draw = initial_w
    print(w_draw)

    #Reshape into vector (needed by ngradient)
    w_vector = initial_w.reshape(clusters * M, 1)

    #Vector to store cost
    xx = np.linspace(1, num_iter, num_iter)
    kmeans_cost = np.empty(*xx.shape)
    kmeans_cost[:] = np.nan

    #    fig = plt.figure(figsize=(14,6))
    #    ax1  = fig.add_subplot(121)
    #    util.scatter_data(X,labels,ax=ax1)
    #
    #    line1, = ax1.plot(w_draw[:,0], w_draw[:,1], "k*",markersize=10, label='W-vector')
    #    # im3  = ax1.scatter(w_draw[:,0], w_draw[:,1])
    #    ax1.grid()
    #
    #    ax2  = fig.add_subplot(122, xlim=(0, num_iter), ylim=(0, 10))
    #
    #    text_str = 'k={}, g={:.2f}\ncost={:.2f}'.format(0, 0, 0)
    #
    #    txt2 = ax2.text(0.3, 0.95, text_str, bbox={'facecolor': 'green', 'alpha': 0.4, 'pad': 10},
    #             transform=ax2.transAxes)
    #
    ##     xx = xx.reshape(1,-1)
    #    line2, = ax2.plot(xx, kmeans_cost, lw=2)
    #    ax2.set_xlabel('Iteration')
    #    ax2.set_ylabel('Cost')
    #    ax2.grid()

    for k in np.arange(num_iter):

        # gradient ascent
        g = util.ngradient(fun, w_vector)
        w_vector = w_vector - mu * g.T
        # calculate cost for plotting
        kmeans_cost[k] = fun(w_vector)


#        text_str = 'k={}, cost={:.2f}'.format(k, kmeans_cost[k])
#        txt2.set_text(text_str)
# plot
#        line2.set_ydata(kmeans_cost)
#        w_draw_new = w_vector.reshape(clusters, M)
#        line1.set_data(w_draw_new[:,0], w_draw_new[:,1])
##        display(fig)
#        clear_output(wait = True)
#        plt.pause(.005)
#    display(fig)
# TODO: Find distance of each point to each cluster center
# Then find the minimum distances min_dist and indices min_index
    w_final = w_vector.reshape(clusters, M)

    D = scipy.spatial.distance.cdist(
        X, w_final, metric='euclidean')  #distances between X and C
    min_index = np.argmin(D, axis=1)
    min_dist = np.zeros((len(min_index), 1))
    for i in range(len(min_index)):
        min_dist[i, 0] = D.item((i, min_index[i]))
    # Sort by intensity of cluster center
    sorted_order = np.argsort(w_final[:, 0], axis=0)

    # Update the cluster indices based on the sorted order and return results in
    # predicted_labels
    predicted_labels = np.empty(*min_index.shape)
    predicted_labels[:] = np.nan

    for i in np.arange(len(sorted_order)):
        predicted_labels[min_index == sorted_order[i]] = i

    return kmeans_cost, predicted_labels, w_final
Esempio n. 6
0
def kmeans_clustering(test_data, K=4):
    # Returns the labels for test_data, predicted by the kMeans
    # classifier which assumes that clusters are ordered by intensity
    #
    # Input:
    # test_data          num_test x p matrix with features for the test data
    # k                  Number of clusters to take into account (2 by default)
    # Output:
    # predicted_labels    num_test x 1 predicted vector with labels for the test data

    # Link to the cost function of kMeans
    fun = lambda w: cost_kmeans(test_data, w)

    # the learning rate
    mu = 0.01

    # iterations
    num_iter = 100

    # ------------------------------------------------------------------#
    test_data = np.array([test_data]).T
    M = test_data.shape[1]
    w_initial = np.zeros((K, M))
    n = np.random.randint(0, 100, size=(1, K))
    n = np.sort(n)
    for i in range(K):
        m = n[0, i]
        w_initial[i, :] = test_data[m, :]
    # ------------------------------------------------------------------#

    # Reshape centers to a vector (needed by ngradient)
    w_vector = w_initial.reshape(K * M, 1)

    import segmentation_util as util
    for i in np.arange(num_iter):
        # gradient ascent
        w_vector = w_vector - mu * util.ngradient(fun, w_vector)

    # Reshape back to dataset
    w_final = w_vector.reshape(K, M)

    # ------------------------------------------------------------------#
    # Then find the minimum distances min_dist and indices min_index
    from scipy import spatial
    D = spatial.distance.cdist(test_data, w_initial, metric='euclidean')
    # For each row/sample in D, which column has the minimum value...
    # (i.e. to which point in w_initial iprint(min_index.shape[0])s this sample the closest)
    min_index = np.argmin(D, axis=1)
    for i in range(len(min_index.shape)):
        p = min_index[i]
        min_dist = D[:, p]
    # ------------------------------------------------------------------#

    # Sort by intensity of cluster center
    sorted_order = np.argsort(w_final[:, 0], axis=0)

    # Update the cluster indices based on the sorted order and return results in
    # predicted_labels
    predicted_labels = np.empty(*min_index.shape)
    predicted_labels[:] = np.nan

    for i in np.arange(len(sorted_order)):
        predicted_labels[min_index == sorted_order[i]] = i

    return predicted_labels