Esempio n. 1
0
def runKMeans(X, initial_centroids, max_iters, plot_progress):
    #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    #is a single example
    #   [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
    #   plot_progress) runs the K-Means algorithm on data matrix X, where each
    #   row of X is a single example. It uses initial_centroids used as the
    #   initial centroids. max_iters specifies the total number of interactions
    #   of K-Means to execute. plot_progress is a true/false flag that
    #   indicates if the function should also plot its progress as the
    #   learning happens. This is set to false by default. runkMeans returns
    #   centroids, a Kxn matrix of the computed centroids and idx, a m x 1
    #   vector of centroid assignments (i.e. each entry in range [1..K])
    #
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    history_centroids = np.zeros(
        (max_iters, centroids.shape[0], centroids.shape[1]))
    idx = np.zeros(X.shape[0])

    for i in range(max_iters):
        print('K-Means iteration {}/{}'.format(i + 1, max_iters))
        history_centroids[i, :] = centroids

        idx = findClosestCentroids(X, centroids)

        if plot_progress:
            plt.figure()
            plotProgressKMeans(X, history_centroids, idx, K, i)
            plt.show()

        centroids = computeCentroids(X, idx, K)

    return centroids, idx
Esempio n. 2
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    # RUNKMEANS runs the K - Means algorithm on data matrix X, where each row of X
    # is a single example
    # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
    # plot_progress) runs the K - Means algorithm on data matrix X, where each
    # row of X is a single example.It uses initial_centroids used as the
    # initial centroids.max_iters specifies the total number of interactions
    # of K - Means to execute.plot_progress is a true / false flag that
    # indicates if the function should also plot its progress as the
    # learning happens.This is set to false by default.runkMeans returns
    # centroids, a Kxn matrix of the computed centroids and idx, a m x 1
    # vector of centroid assignments(i.e.each entry in range[1..K])

    # Initialize values
    m, n = np.shape(X)
    K = np.size(initial_centroids, 0)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))

    # Run   K - Means
    for i in range(max_iters):
        print('K-Means iteration #', i, ' out of ', max_iters)

        # For each example in X, assign it to the   closest centroid
        idx = findClosestCentroids(X, centroids)

        # Optionally, plot  progress    here
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
        previous_centroids = centroids
        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    return centroids, idx
Esempio n. 3
0
def runkMeans(X, initial_centroids, max_iters, plot_progress):
    """     
    RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    is a single example
       [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
       plot_progress) runs the K-Means algorithm on data matrix X, where each 
       row of X is a single example. It uses initial_centroids used as the
       initial centroids. max_iters specifies the total number of interactions 
       of K-Means to execute. plot_progress is a true/false flag that 
       indicates if the function should also plot its progress as the 
       learning happens. This is set to false by default. runkMeans returns 
       centroids, a Kxn matrix of the computed centroids and idx, a m x 1 
       vector of centroid assignments (i.e. each entry in range [1..K]) 
    """
    if plot_progress: plt.figure()

    K = initial_centroids.shape[0]
    centroids = []
    idx = []
    centroids.append(initial_centroids)

    for i in range(max_iters):
        idx = findClosestCentroids(X, centroids[len(centroids) - 1])
        c = computeCentroids(X, idx, K)
        # print("{}:\n{}\n".format(i, c))

        centroids.append(c)
        if plot_progress:
            plotProgresskMeans(X, centroids, K, idx)

    return centroids[len(centroids) - 1], idx
Esempio n. 4
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):

    # Initialize values
    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))

    # Run K-Means
    for i in range(max_iters):

        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)

        # Optionally, plot progress here
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i + 1)
            previous_centroids = centroids

        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    plt.show()
    return centroids, idx
Esempio n. 5
0
def runKMeans(myX, initial_centroids, K, n_iter):
    """
    Function that actually does the iterations
    """
    centroid_history = []
    current_centroids = initial_centroids
    for myiter in xrange(n_iter):
        centroid_history.append(current_centroids)
        idxs = findClosestCentroids(myX,current_centroids)
        current_centroids = computeCentroids(myX,idxs,K)
        
    return idxs, centroid_history
Esempio n. 6
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    """runs the K-Means algorithm on data matrix X, where each
    row of X is a single example. It uses initial_centroids used as the
    initial centroids. max_iters specifies the total number of interactions
    of K-Means to execute. plot_progress is a true/false flag that
    indicates if the function should also plot its progress as the
    learning happens. This is set to false by default. runkMeans returns
    centroids, a Kxn matrix of the computed centroids and idx, a m x 1
    vector of centroid assignments (i.e. each entry in range [1..K])
    """

    # Plot the data if we are plotting progress
    if plot_progress:
        fig = plt.figure()
        ax = plt.gca()

    # Initialize values
    m, n = X.shape
    K = len(initial_centroids)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros(m)
    c = itertools.cycle('012')
    rgb = np.eye(3)

    # Run K-Means
    for i in range(max_iters):

        # Output progress
        print('K-Means iteration %d/%d...' % (i, max_iters))

        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)

        # Optionally, plot progress here
        if plot_progress:
            color = rgb[int(next(c))]
            plotProgresskMeans(X, np.array(centroids),
                               np.array(previous_centroids), idx, K, i, color,
                               ax)
            previous_centroids = centroids
            show()
            fig.canvas.draw()

        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    # Hold off if we are plotting progress
    if plot_progress:
        pass
    # hold off
    return centroids, idx
Esempio n. 7
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    #is a single example
    #   [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
    #   plot_progress) runs the K-Means algorithm on data matrix X, where each 
    #   row of X is a single example. It uses initial_centroids used as the
    #   initial centroids. max_iters specifies the total number of interactions 
    #   of K-Means to execute. plot_progress is a true/false flag that 
    #   indicates if the function should also plot its progress as the 
    #   learning happens. This is set to false by default. runkMeans returns 
    #   centroids, a Kxn matrix of the computed centroids and idx, a m x 1 
    #   vector of centroid assignments (i.e. each entry in range [1..K])
    #

    # Initialize values
    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))

    # if plotting, set up the space for interactive graphs
    # http://stackoverflow.com/a/4098938/583834
    # http://matplotlib.org/faq/usage_faq.html#what-is-interactive-mode
    if plot_progress:
        plt.close()
        plt.ion()

    # Run K-Means
    for i in range(max_iters):
        
        # Output progress
        sys.stdout.write('\rK-Means iteration {:d}/{:d}...'.format(i+1, max_iters))
        sys.stdout.flush()
        
        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)
        
        # Optionally, plot progress here
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
            input('Program paused. Press <Enter> to continue...')
        
        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    # Hold off if we are plotting progress
    print('\n')

    return centroids, idx
Esempio n. 8
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    #is a single example
    #   centroids, idx = RUNKMEANS(X, initial_centroids, max_iters, plot_progress=false)
    #   runs the K-Means algorithm on data matrix X, where each
    #   row of X is a single example. It uses initial_centroids used as the
    #   initial centroids. max_iters specifies the total number of interactions
    #   of K-Means to execute. plot_progress is a True/False flag that
    #   indicates if the function should also plot its progress as the
    #   learning happens. This is set to False by default. runkMeans returns
    #   centroids, a K x n matrix of the computed centroids and idx, a vector of
    #   size m of centroid assignments (i.e. each entry in range [1..K])
    #

    # Plot the data if we are plotting progress
    if plot_progress:
        fig = figure()
        hold(True)

    # Initialize values
    m, n = shape(X)
    K = size(initial_centroids, 0)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = zeros(m)

    # Run K-Means
    for i in range(max_iters):

        # Output progress
        print 'K-Means iteration %d/%d...' % (i+1, max_iters)

        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)

        # Optionally, plot progress here
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
            fig.show()
            print 'Press enter to continue.'
            raw_input()

        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    # Hold off if we are plotting progress
    if plot_progress:
        hold(True)

    return centroids, idx
Esempio n. 9
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    #is a single example
    #   centroids, idx = RUNKMEANS(X, initial_centroids, max_iters, plot_progress=false)
    #   runs the K-Means algorithm on data matrix X, where each
    #   row of X is a single example. It uses initial_centroids used as the
    #   initial centroids. max_iters specifies the total number of interactions
    #   of K-Means to execute. plot_progress is a True/False flag that
    #   indicates if the function should also plot its progress as the
    #   learning happens. This is set to False by default. runkMeans returns
    #   centroids, a K x n matrix of the computed centroids and idx, a vector of
    #   size m of centroid assignments (i.e. each entry in range [1..K])
    #

    # Plot the data if we are plotting progress
    if plot_progress:
        fig = figure()
        hold(True)

    # Initialize values
    m, n = shape(X)
    K = size(initial_centroids, 0)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = zeros(m)

    # Run K-Means
    for i in range(max_iters):

        # Output progress
        print 'K-Means iteration %d/%d...' % (i + 1, max_iters)

        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)

        # Optionally, plot progress here
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
            fig.show()
            print 'Press enter to continue.'
            raw_input()

        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    # Hold off if we are plotting progress
    if plot_progress:
        hold(True)

    return centroids, idx
Esempio n. 10
0
def runkMeans(X,initial_centroids,max_iters,K):
    centroids = initial_centroids
    centroid_history = []
    idx = None
    for i in range(max_iters):
        idx = findClosestCentroids(X, centroids)
        centroid_history.append(centroids)
        centroids = computeCentroids(X,idx,K)
        plotProgresskMeans(i,centroid_history , K)
    
    plt.scatter(X[:,0],X[:,1],c=idx,marker='o')
    plt.show()
    
    return centroids , idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    """runs the K-Means algorithm on data matrix X, where each
    row of X is a single example. It uses initial_centroids used as the
    initial centroids. max_iters specifies the total number of interactions
    of K-Means to execute. plot_progress is a true/false flag that
    indicates if the function should also plot its progress as the
    learning happens. This is set to false by default. runkMeans returns
    centroids, a Kxn matrix of the computed centroids and idx, a m x 1
    vector of centroid assignments (i.e. each entry in range [1..K])
    """

# Plot the data if we are plotting progress
    if plot_progress:
        plt.figure()

# Initialize values
    m, n = X.shape
    K = len(initial_centroids)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros(m)
    c = itertools.cycle('012')
    rgb = np.eye(3)
# Run K-Means
    for i in range(max_iters):
    
        # Output progress
        print 'K-Means iteration %d/%d...' % (i, max_iters)

        # For each example in X, assign it to the closest centroid
        _, idx = findClosestCentroids(X, centroids)
    
        # Optionally, plot progress here
        if plot_progress:
            color = rgb[int(next(c))]
            plotProgresskMeans(X, np.array(centroids),
                               np.array(previous_centroids), idx, K, i, color)
            previous_centroids = centroids
            # raw_input("Press Enter to continue...")

    # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

# Hold off if we are plotting progress
    if plot_progress:
        pass
    # hold off
    return centroids, idx
Esempio n. 12
0
def runkMeans(X, initial_centroids, max_iters, plot_progress):
    m, n = np.shape(X)
    K = np.size(initial_centroids, axis=0)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))
    if plot_progress:
        plotDataPoint(X, idx, K)
    for i in range(max_iters):
        idx = findClosestCentroids(X, centroids)
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
        centroids = computeCentroids(X, idx, K)
    plt.show()
    return centroids, idx
Esempio n. 13
0
def runkMeans(X, initial_centroids, max_iters, plot_progress):
    """ My Manual kMeans implementation."""

    #number of datapoints
    m = X.shape[0]
    #number of dimensions
    n = X.shape[1]
    #number of centroids
    K = initial_centroids.shape[0]
    #indicies & centroids to be returned
    indicies = np.zeros(X.shape[0], dtype=np.int32)
    centroids = initial_centroids
    previous_centroids = centroids

    #setup figure for plotting
    if plot_progress == True:
        fig = pyplot.figure()
        pyplot.ion()
        ax = pyplot.axes()
        ax.set_title('K-means centroid evolution')
        pyplot.ylabel('Feature 2')
        pyplot.xlabel('Feature 1')
        pyplot.show()

    # Run K-Means, Loop over for number of iterations specified in input
    for i in xrange(0, max_iters):
        # For each example in X, assign it to the closest centroid
        indicies = findClosestCentroids(X, centroids)
        if plot_progress == True:
            # Output progress
            print 'K-Means iteration ', i, '/', max_iters, ' ...\n'
            ax.scatter(X[:, 0], X[:, 1], c=indicies, label='Data Points')
            ax.scatter(centroids[:, 0],
                       centroids[:, 1],
                       marker='x',
                       c='r',
                       linewidth=2,
                       label='Centroids')
            previous_centroids = centroids
            if i == 0:
                ax.legend()
            pyplot.draw()
            raw_input("Press any key to continue")
        # compute new centroids
        centroids = computeCentroids(X, indicies, K)

    return centroids, indicies
Esempio n. 14
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    '''
    [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
    plot_progress) runs the K-Means algorithm on data matrix X, where each 
    row of X is a single example. It uses initial_centroids used as the
    initial centroids. max_iters specifies the total number of interactions 
    of K-Means to execute. plot_progress is a true/false flag that 
    indicates if the function should also plot its progress as the 
    learning happens. This is set to false by default. runkMeans returns 
    centroids, a Kxn matrix of the computed centroids and idx, a m x 1 
    vector of centroid assignments (i.e. each entry in range [1..K])
    '''

    import numpy as np
    from findClosestCentroids import findClosestCentroids
    from plotProgresskMeans import plotProgresskMeans
    from computeCentroids import computeCentroids
    import matplotlib.pyplot as plt

    # Initialize values
    m, _ = np.shape(X)
    K = np.size(initial_centroids, 0)
    centroids = initial_centroids
    acc_centroids = centroids
    idx = np.zeros((m, 1))

    # Run K-Means

    for i in range(max_iters):

        # Output progress
        print('K-Means iteration %d/%d...\n' % (i, max_iters))

        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)

        # Optionally, plot progress here
        if plot_progress:
            plotProgresskMeans(X, acc_centroids, idx, K, i)
            plt.show()

        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)
        acc_centroids = np.append(acc_centroids, centroids, axis=0)

    return centroids, idx
Esempio n. 15
0
def runkMeans(name, X, initial_centroids, max_iters, plot_progress=False):
    #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    #is a single example
    #   [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
    #   plot_progress) runs the K-Means algorithm on data matrix X, where each 
    #   row of X is a single example. It uses initial_centroids used as the
    #   initial centroids. max_iters specifies the total number of interactions 
    #   of K-Means to execute. plot_progress is a true/false flag that 
    #   indicates if the function should also plot its progress as the 
    #   learning happens. This is set to false by default. runkMeans returns 
    #   centroids, a Kxn matrix of the computed centroids and idx, a m x 1 
    #   vector of centroid assignments (i.e. each entry in range [1..K])
    #

    # Initialize values
    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = []
    idx = None
    idx_history = []

    # Run K-Means
    for i in range(max_iters):
    
        # Output progress
        print('K-Means iteration %d/%d...' % (i, max_iters))
    
        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)
        idx_history.append(idx)
        previous_centroids.append(centroids)
    
        # Optionally, plot progress here
        if plot_progress:
            fig = plt.figure()
            plotProgresskMeans(X, centroids, previous_centroids, idx_history, K, i)
            plt.savefig('figure%s.%03d.png' % (name, i))
    
        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)
    #end

    #end

    return (centroids, idx)
Esempio n. 16
0
def runKmeans(X, centroids, max_iters, plot_progress):
    m, n = X.shape
    K = centroids.shape[0]
    centroids = centroids
    previous_centroids = centroids
    idx = np.zeros([m, 1])

    for i in range(max_iters):
        print('K-Means iteration {}/{}...\n'.format(i + 1, max_iters))

        idx = findClosestCentroids(X, centroids)

        if plot_progress:
            plotProgressKmeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
            input('Press enter to continue.\n')

        centroids = computeCentroids(X, idx, K)

    return (centroids, idx)
def runkMeans(data, initial_centroids,max_iters, plot_progress):
    # Initialize values
    (m,n) = np.shape(data);
    print(m)
    print(n)
    k = len(initial_centroids)
    centroids = initial_centroids;
    previous_centroids = centroids;
    idx = [0] * m;
    for i in range(max_iters):
        print('K-Means iteration #d/#d...\n', i, max_iters);
        idx = findClosestCentroids(data, centroids);
        if plot_progress:
            # plotProgresskMeans(data, centroids, previous_centroids, idx, k, i);
            plotDataPoints(data, idx, k)
            previous_centroids = centroids;
            print('Press enter to continue.\n');
            # pause;
        # Given the memberships, compute new centroids
        centroids = computeCentroids(data, idx, k);
    return centroids, idx
Esempio n. 18
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    # Initialize values
    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))

    # Run K-Means
    for i in range(1, max_iters + 1):
        print('K-Means iteration %d/%d' % (i, max_iters))

        idx = findClosestCentroids(X, centroids)

        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids

        centroids = computeCentroids(X, idx, K)

    return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=None):

    # Set default value for plot progress
    if plot_progress == None:
        plot_progress = False

    # Plot the data if we are plotting progress
    if plot_progress:
        plt.figure()

    # Initialize values
    m = X.shape[0]
    n = X.shape[1]
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros(m)
    idx = idx.astype(int)

    # Run K-Means
    for i in range(max_iters):

        # Output progress
        print('K-Means iteration %d/%d...\n' % (i, max_iters))

        # For each example in X, assign it to the closest centroid
        idx = findClosestCentroids(X, centroids)

        # For each example in X, assign it to the closest centroid
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids.copy()
            input('Press enter to continue.\n')

        # Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    return centroids, idx
Esempio n. 20
0
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    """ 
        Функция выполняет алгоритм K-средних для матрицы объекты-признаки X. 
        Формальный параметр initial_centroids определяет начальное 
        расположение средних, max_iters определяет число итераций алгоритма, 
        а plot_progress отвечает за визуализацию процесса сходимости в ходе 
        обучения
    """

    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids

    if plot_progress:
        plt.plot(X[:, 0], X[:, 1], 'bo')

    for i in range(max_iters):
        idx = findClosestCentroids(X,
                                   centroids)  # первый шаг алгоритма K-средних

        if plot_progress:

            for j in range(K):
                plt.plot([centroids[j, 0], previous_centroids[j, 0]],
                         [centroids[j, 1], previous_centroids[j, 1]], 'r-x')

        previous_centroids = centroids  # второй шаг алгоритма K-средних
        centroids = computeCentroids(X, idx,
                                     K)  # третий шаг алгоритма K-средних

    if plot_progress:
        plt.xlabel('Первый признак')
        plt.ylabel('Второй признак')
        plt.grid()
        plt.show()

    return (centroids, idx)
Esempio n. 21
0
def runkMeans(X, initial_centroids, max_iters, plot_progress = False):
# RUNKMEANS runs the K - Means algorithm on data matrix X, where each row of X
# is a single example
# [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
# plot_progress) runs the K - Means algorithm on data matrix X, where each
# row of X is a single example.It uses initial_centroids used as the
# initial centroids.max_iters specifies the total number of interactions
# of K - Means to execute.plot_progress is a true / false flag that
# indicates if the function should also plot its progress as the
# learning happens.This is set to false by default.runkMeans returns
# centroids, a Kxn matrix of the computed centroids and idx, a m x 1
# vector of centroid assignments(i.e.each entry in range[1..K])


# Initialize values
    m, n = np.shape(X)
    K = np.size(initial_centroids, 0)
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))

# Run   K - Means
    for i in range(max_iters):
        print('K-Means iteration #', i,' out of ', max_iters)

# For each example in X, assign it to the   closest centroid
        idx = findClosestCentroids(X, centroids)

# Optionally, plot  progress    here
        if plot_progress:
            plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
        previous_centroids = centroids
# Given the memberships, compute new centroids
        centroids = computeCentroids(X, idx, K)

    return centroids, idx
def output(partId):
    # Random Test Cases
    X = np.sin(np.arange(1, 166)).reshape(15, 11, order='F')
    Z = np.cos(np.arange(1, 122)).reshape(11, 11, order='F')
    C = Z[:5, :]
    idx = np.arange(1, 16) % 3
    if partId == '1':
        idx = findClosestCentroids(X, C) + 1
        out = formatter('%0.5f ', idx.ravel('F'))
    elif partId == '2':
        centroids = computeCentroids(X, idx, 3)
        out = formatter('%0.5f ', centroids.ravel('F'))
    elif partId == '3':
        U, S = pca(X)
        out = formatter(
            '%0.5f ', np.abs(np.hstack([U.ravel('F'),
                                        np.diag(S).ravel('F')])))
    elif partId == '4':
        X_proj = projectData(X, Z, 5)
        out = formatter('%0.5f ', X_proj.ravel('F'))
    elif partId == '5':
        X_rec = recoverData(X[:, :5], Z, 5)
        out = formatter('%0.5f ', X_rec.ravel('F'))
    return out
Esempio n. 23
0
#  into two functions -- findClosestCentroids and computeCentroids. In this
#  part, you should complete the code in the findClosestCentroids function.
#
print('Finding closest centroids.')

# Load an example dataset that we will be using
data = scipy.io.loadmat('ex7data2.mat')
X = data['X']

# Select an initial set of centroids
K = 3  # 3 Centroids
initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

# Find the closest centroids for the examples using the
# initial_centroids
idx = findClosestCentroids(X, initial_centroids)

print('Closest centroids for the first 3 examples:')
print(idx[0:3].tolist())
print('(the closest centroids should be 0, 2, 1 respectively)')

input('Program paused. Press <Enter> to continue...')

## ===================== Part 2: Compute Means =========================
#  After implementing the closest centroids function, you should now
#  complete the computeCentroids function.
#
print('Computing centroids means.')

#  Compute means based on the closest centroids found in the previous part.
centroids = computeCentroids(X, idx, K)
Esempio n. 24
0
# ================= Part 1: Find Closest Centroids ====================
# To help you implement K-Means, we have divided the learning algorithm
# into two functions -- findClosestCentroids and computeCentroids. In this
# part, you shoudl complete the code in the findClosestCentroids function.
print('Finding closest centroids.')

data = loadmat('ex7data2.mat')
X = data['X']

# Select an initial set of centroids
K = 3
initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

# Find the closest centroids for the examples using the initial_centroids
idx = findClosestCentroids(X, initial_centroids)
print('Closest centroids for the first 3 examples:')
print(idx[:3].ravel())
print('(the closest centroids should be 1, 3, 2 respectively)\n')


# ===================== Part 2: Compute Means =========================
# After implementing the closest centroids function, you should now
# complete the computeCentroids function.
print('Computing centroids means.')

# Compute means based on the closest centroids found in the previous part.
centroids = computeCentroids(X, idx, K)

print('Centroids computed after initial finding of closest centroids:')
print(centroids)
Esempio n. 25
0
import time as time

# Step 1 show K-means in action
print 'Finding closest centroids.\n\n'

# load MATLAB file containing 2D data
mat_contents = sio.loadmat('ex7data2.mat')
X = mat_contents['X']

# Select an initial set of centroids and define the number of itterations
K = 3  # 3 Centroids
initial_centroids = np.array([[3, 3], [6, 2], [8, 5]], dtype=np.float32)
max_iters = 10

# Find the closest centroids for the examples using the initial_centroids
indicies = findClosestCentroids(X, initial_centroids)

print 'Closest centroids for the first 3 examples: \n'
print indicies[0], indicies[1], indicies[2]
print '\n(the closest centroids should be 0, 2, 1 respectively)\n'
raw_input("Press any key to continue")

print '\nComputing centroids means.\n\n'

#Compute means based on the closest centroids found in the previous part.
centroids = computeCentroids(X, indicies, K)

print 'Centroids computed after initial finding of closest centroids: \n'
print centroids[0], '\n', centroids[1], '\n', centroids[2]
print '\nthe centroids should be\n'
print '   [ 2.428301 3.157924 ]\n'
def runkMeans(X, initial_centroids, max_iters, plot_progress=False):
    #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X
    #is a single example
    #   [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ...
    #   plot_progress) runs the K-Means algorithm on data matrix X, where each 
    #   row of X is a single example. It uses initial_centroids used as the
    #   initial centroids. max_iters specifies the total number of interactions 
    #   of K-Means to execute. plot_progress is a true/false flag that 
    #   indicates if the function should also plot its progress as the 
    #   learning happens. This is set to false by default. runkMeans returns 
    #   centroids, a Kxn matrix of the computed centroids and idx, a m x 1 
    #   vector of centroid assignments (i.e. each entry in range [1..K])
    #

    # Set default value for plot progress
    # (commented out due to pythonic default parameter assignment above)
    # if not plot_progress:
    #     plot_progress = False

    # Plot the data if we are plotting progress
    # if plot_progress:
    #     plt.hold(True)

    # Initialize values
    m, n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros((m, 1))

    # if plotting, set up the space for interactive graphs
    # http://stackoverflow.com/a/4098938/583834
    # http://matplotlib.org/faq/usage_faq.html#what-is-interactive-mode
    if plot_progress:
        plt.close()
        plt.ion()

    # Run K-Means
    for i in xrange(max_iters):
        
        # Output progress
        sys.stdout.write('\rK-Means iteration {:d}/{:d}...'.format(i+1, max_iters))
        sys.stdout.flush()
        
        # For each example in X, assign it to the closest centroid
        idx = fcc.findClosestCentroids(X, centroids)
        
        # Optionally, plot progress here
        if plot_progress:
            ppkm.plotProgresskMeans(X, centroids, previous_centroids, idx, K, i)
            previous_centroids = centroids
            raw_input('Press enter to continue.')
        
        # Given the memberships, compute new centroids
        centroids = cc.computeCentroids(X, idx, K)

    # Hold off if we are plotting progress
    print('\n')

    # if plot_progress:
    #     plt.hold(False)

    return centroids, idx
Esempio n. 27
0
def ex7():
    ## Machine Learning Online Class
    #  Exercise 7 | Principle Component Analysis and K-Means Clustering
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     pca.m
    #     projectData.m
    #     recoverData.m
    #     computeCentroids.m
    #     findClosestCentroids.m
    #     kMeansInitCentroids.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## ================= Part 1: Find Closest Centroids ====================
    #  To help you implement K-Means, we have divided the learning algorithm 
    #  into two functions -- findClosestCentroids and computeCentroids. In this
    #  part, you shoudl complete the code in the findClosestCentroids function. 
    #
    print('Finding closest centroids.\n')

    # Load an example dataset that we will be using
    mat = scipy.io.loadmat('ex7data2.mat')
    X = mat['X']

    # Select an initial set of centroids
    K = 3 # 3 Centroids
    initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

    # Find the closest centroids for the examples using the
    # initial_centroids
    idx = findClosestCentroids(X, initial_centroids)

    print('Closest centroids for the first 3 examples: ')
    print(formatter(' %d', idx[:3] + 1))
    print('\n(the closest centroids should be 1, 3, 2 respectively)')

    print('Program paused. Press enter to continue.')
    #pause

    ## ===================== Part 2: Compute Means =========================
    #  After implementing the closest centroids function, you should now
    #  complete the computeCentroids function.
    #
    print('\nComputing centroids means.\n')

    #  Compute means based on the closest centroids found in the previous part.
    centroids = computeCentroids(X, idx, K)

    print('Centroids computed after initial finding of closest centroids: ')
    print(centroids)
    print('\n(the centroids should be')
    print('   [ 2.428301 3.157924 ]')
    print('   [ 5.813503 2.633656 ]')
    print('   [ 7.119387 3.616684 ]\n')

    print('Program paused. Press enter to continue.')
    #pause


    ## =================== Part 3: K-Means Clustering ======================
    #  After you have completed the two functions computeCentroids and
    #  findClosestCentroids, you have all the necessary pieces to run the
    #  kMeans algorithm. In this part, you will run the K-Means algorithm on
    #  the example dataset we have provided. 
    #
    print('\nRunning K-Means clustering on example dataset.\n')

    # Load an example dataset
    mat = scipy.io.loadmat('ex7data2.mat')
    X = mat['X']

    # Settings for running K-Means
    K = 3
    max_iters = 10

    # For consistency, here we set centroids to specific values
    # but in practice you want to generate them automatically, such as by
    # settings them to be random examples (as can be seen in
    # kMeansInitCentroids).
    initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

    # Run K-Means algorithm. The 'true' at the end tells our function to plot
    # the progress of K-Means
    centroids, idx = runkMeans('1', X, initial_centroids, max_iters, True)
    print('\nK-Means Done.\n')

    print('Program paused. Press enter to continue.')
    #pause

    ## ============= Part 4: K-Means Clustering on Pixels ===============
    #  In this exercise, you will use K-Means to compress an image. To do this,
    #  you will first run K-Means on the colors of the pixels in the image and
    #  then you will map each pixel on to it's closest centroid.
    #  
    #  You should now complete the code in kMeansInitCentroids.m
    #

    print('\nRunning K-Means clustering on pixels from an image.\n')

    #  Load an image of a bird
    A = matplotlib.image.imread('bird_small.png')

    # If imread does not work for you, you can try instead
    #   load ('bird_small.mat')

    A = A / 255 # Divide by 255 so that all values are in the range 0 - 1

    # Size of the image
    #img_size = size(A)

    # Reshape the image into an Nx3 matrix where N = number of pixels.
    # Each row will contain the Red, Green and Blue pixel values
    # This gives us our dataset matrix X that we will use K-Means on.
    X = A.reshape(-1, 3)

    # Run your K-Means algorithm on this data
    # You should try different values of K and max_iters here
    K = 16
    max_iters = 10

    # When using K-Means, it is important the initialize the centroids
    # randomly. 
    # You should complete the code in kMeansInitCentroids.m before proceeding
    initial_centroids = kMeansInitCentroids(X, K)

    # Run K-Means
    centroids, idx = runkMeans('2', X, initial_centroids, max_iters)

    print('Program paused. Press enter to continue.')
    #pause


    ## ================= Part 5: Image Compression ======================
    #  In this part of the exercise, you will use the clusters of K-Means to
    #  compress an image. To do this, we first find the closest clusters for
    #  each example. After that, we 

    print('\nApplying K-Means to compress an image.\n')

    # Find closest cluster members
    idx = findClosestCentroids(X, centroids)

    # Essentially, now we have represented the image X as in terms of the
    # indices in idx. 

    # We can now recover the image from the indices (idx) by mapping each pixel
    # (specified by it's index in idx) to the centroid value
    X_recovered = centroids[idx,:].reshape(A.shape)

    # Reshape the recovered image into proper dimensions
    X_recovered = X_recovered.reshape(A.shape)

    fig, ax = plt.subplots(1, 2, figsize=(8, 4))

    # Display the original image 
    ax[0].imshow(A * 255)
    ax[0].grid(False)
    ax[0].set_title('Original')

    # Display compressed image side by side
    ax[1].imshow(X_recovered * 255)
    ax[1].grid(False)
    ax[1].set_title('Compressed, with %d colors' % K)

    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.\n')
Esempio n. 28
0
# =============== Часть 1. Поиск ближайших средних ===============

print('Часть 1. Поиск ближайших средних')

# Загрузка данных и формирование матрицы объекты-признаки X
data = spi.loadmat('data.mat')

X = data['X']

# Выбор начального множества средних
K = 3  # число средних
initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

# Поиск ближайших средних с учетом их начальных значений
idx = findClosestCentroids(X, initial_centroids)

print(
    'Номера ближайших средних для первых трех примеров: {:.0f} {:.0f} {:.0f}'.
    format(idx[0, 0], idx[1, 0], idx[2, 0]))

input('Программа остановлена. Нажмите Enter для продолжения ... \n')

# ============= Часть 2. Вычисление значений средних =============

print('Часть 2. Вычисление значений средних')

# Вычисление значений средних
centroids = computeCentroids(X, idx, K)

print('Вычисленные значения средних:')
Esempio n. 29
0
# When using K-Means, it is important the initialize the centroids randomly.
# You should complete the code in kMeansInitCentroids.m before proceeding
initial_centroids = kMeansInitCentroids(X, K)

# Run K-Means
centroids, idx = runkMeans(X, initial_centroids, max_iters)

# ================= Part 5: Image Compression ======================
#  In this part of the exercise, you will use the clusters of K-Means to
#  compress an image. To do this, we first find the closest clusters for
#  each example. After that, we

print('\nApplying K-Means to compress an image.\n\n')

# Find closest cluster members
idx = findClosestCentroids(X, centroids)

# Essentially, now we have represented the image X as in terms of the
# indices in idx.

# We can now recover the image from the indices (idx) by mapping each pixel
# (specified by its index in idx) to the centroid value
X_recovered = centroids[idx.astype(int) - 1, :]

# Reshape the recovered image into proper dimensions
X_recovered = X_recovered.reshape(img_size[0], img_size[1], 3)

# Display the original image
plt.subplot(1, 2, 1)
plt.imshow(A)
plt.title('Original')
Esempio n. 30
0
x = image.shape[0]
y = image.shape[1]
image_formatted = np.reshape(image, (x * y, 3))
# print(image)

k = 16
max_iters = 10

initial_centroids = kMeansInitCentroids(image_formatted, k)
print(initial_centroids)

# Run K-Means
[centroids, idx] = runkMeans(image_formatted, initial_centroids, max_iters,
                             False)

idx = findClosestCentroids(image_formatted, centroids)

feature_size = len(image_formatted[0])
len_idx = len(idx)
X_recovered = [[0] * feature_size for _ in range(len_idx)]

for i in range(len_idx):
    for j in range(feature_size):
        X_recovered[i][j] = centroids[idx[i] - 1][j]

X_recovered = np.reshape(X_recovered, (x, y, 3))
X_recovered = X_recovered * 255

plt.subplot(1, 2, 1)
plt.imshow(image1)
plt.title('Original')