def runKMeans(X, initial_centroids, max_iters, plot_progress): #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X #is a single example # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... # plot_progress) runs the K-Means algorithm on data matrix X, where each # row of X is a single example. It uses initial_centroids used as the # initial centroids. max_iters specifies the total number of interactions # of K-Means to execute. plot_progress is a true/false flag that # indicates if the function should also plot its progress as the # learning happens. This is set to false by default. runkMeans returns # centroids, a Kxn matrix of the computed centroids and idx, a m x 1 # vector of centroid assignments (i.e. each entry in range [1..K]) # K = initial_centroids.shape[0] centroids = initial_centroids history_centroids = np.zeros( (max_iters, centroids.shape[0], centroids.shape[1])) idx = np.zeros(X.shape[0]) for i in range(max_iters): print('K-Means iteration {}/{}'.format(i + 1, max_iters)) history_centroids[i, :] = centroids idx = findClosestCentroids(X, centroids) if plot_progress: plt.figure() plotProgressKMeans(X, history_centroids, idx, K, i) plt.show() centroids = computeCentroids(X, idx, K) return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): # RUNKMEANS runs the K - Means algorithm on data matrix X, where each row of X # is a single example # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... # plot_progress) runs the K - Means algorithm on data matrix X, where each # row of X is a single example.It uses initial_centroids used as the # initial centroids.max_iters specifies the total number of interactions # of K - Means to execute.plot_progress is a true / false flag that # indicates if the function should also plot its progress as the # learning happens.This is set to false by default.runkMeans returns # centroids, a Kxn matrix of the computed centroids and idx, a m x 1 # vector of centroid assignments(i.e.each entry in range[1..K]) # Initialize values m, n = np.shape(X) K = np.size(initial_centroids, 0) centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) # Run K - Means for i in range(max_iters): print('K-Means iteration #', i, ' out of ', max_iters) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress): """ RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X is a single example [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... plot_progress) runs the K-Means algorithm on data matrix X, where each row of X is a single example. It uses initial_centroids used as the initial centroids. max_iters specifies the total number of interactions of K-Means to execute. plot_progress is a true/false flag that indicates if the function should also plot its progress as the learning happens. This is set to false by default. runkMeans returns centroids, a Kxn matrix of the computed centroids and idx, a m x 1 vector of centroid assignments (i.e. each entry in range [1..K]) """ if plot_progress: plt.figure() K = initial_centroids.shape[0] centroids = [] idx = [] centroids.append(initial_centroids) for i in range(max_iters): idx = findClosestCentroids(X, centroids[len(centroids) - 1]) c = computeCentroids(X, idx, K) # print("{}:\n{}\n".format(i, c)) centroids.append(c) if plot_progress: plotProgresskMeans(X, centroids, K, idx) return centroids[len(centroids) - 1], idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): # Initialize values m, n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) # Run K-Means for i in range(max_iters): # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i + 1) previous_centroids = centroids # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) plt.show() return centroids, idx
def runKMeans(myX, initial_centroids, K, n_iter): """ Function that actually does the iterations """ centroid_history = [] current_centroids = initial_centroids for myiter in xrange(n_iter): centroid_history.append(current_centroids) idxs = findClosestCentroids(myX,current_centroids) current_centroids = computeCentroids(myX,idxs,K) return idxs, centroid_history
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): """runs the K-Means algorithm on data matrix X, where each row of X is a single example. It uses initial_centroids used as the initial centroids. max_iters specifies the total number of interactions of K-Means to execute. plot_progress is a true/false flag that indicates if the function should also plot its progress as the learning happens. This is set to false by default. runkMeans returns centroids, a Kxn matrix of the computed centroids and idx, a m x 1 vector of centroid assignments (i.e. each entry in range [1..K]) """ # Plot the data if we are plotting progress if plot_progress: fig = plt.figure() ax = plt.gca() # Initialize values m, n = X.shape K = len(initial_centroids) centroids = initial_centroids previous_centroids = centroids idx = np.zeros(m) c = itertools.cycle('012') rgb = np.eye(3) # Run K-Means for i in range(max_iters): # Output progress print('K-Means iteration %d/%d...' % (i, max_iters)) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: color = rgb[int(next(c))] plotProgresskMeans(X, np.array(centroids), np.array(previous_centroids), idx, K, i, color, ax) previous_centroids = centroids show() fig.canvas.draw() # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) # Hold off if we are plotting progress if plot_progress: pass # hold off return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X #is a single example # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... # plot_progress) runs the K-Means algorithm on data matrix X, where each # row of X is a single example. It uses initial_centroids used as the # initial centroids. max_iters specifies the total number of interactions # of K-Means to execute. plot_progress is a true/false flag that # indicates if the function should also plot its progress as the # learning happens. This is set to false by default. runkMeans returns # centroids, a Kxn matrix of the computed centroids and idx, a m x 1 # vector of centroid assignments (i.e. each entry in range [1..K]) # # Initialize values m, n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) # if plotting, set up the space for interactive graphs # http://stackoverflow.com/a/4098938/583834 # http://matplotlib.org/faq/usage_faq.html#what-is-interactive-mode if plot_progress: plt.close() plt.ion() # Run K-Means for i in range(max_iters): # Output progress sys.stdout.write('\rK-Means iteration {:d}/{:d}...'.format(i+1, max_iters)) sys.stdout.flush() # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids input('Program paused. Press <Enter> to continue...') # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) # Hold off if we are plotting progress print('\n') return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X #is a single example # centroids, idx = RUNKMEANS(X, initial_centroids, max_iters, plot_progress=false) # runs the K-Means algorithm on data matrix X, where each # row of X is a single example. It uses initial_centroids used as the # initial centroids. max_iters specifies the total number of interactions # of K-Means to execute. plot_progress is a True/False flag that # indicates if the function should also plot its progress as the # learning happens. This is set to False by default. runkMeans returns # centroids, a K x n matrix of the computed centroids and idx, a vector of # size m of centroid assignments (i.e. each entry in range [1..K]) # # Plot the data if we are plotting progress if plot_progress: fig = figure() hold(True) # Initialize values m, n = shape(X) K = size(initial_centroids, 0) centroids = initial_centroids previous_centroids = centroids idx = zeros(m) # Run K-Means for i in range(max_iters): # Output progress print 'K-Means iteration %d/%d...' % (i+1, max_iters) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids fig.show() print 'Press enter to continue.' raw_input() # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) # Hold off if we are plotting progress if plot_progress: hold(True) return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X #is a single example # centroids, idx = RUNKMEANS(X, initial_centroids, max_iters, plot_progress=false) # runs the K-Means algorithm on data matrix X, where each # row of X is a single example. It uses initial_centroids used as the # initial centroids. max_iters specifies the total number of interactions # of K-Means to execute. plot_progress is a True/False flag that # indicates if the function should also plot its progress as the # learning happens. This is set to False by default. runkMeans returns # centroids, a K x n matrix of the computed centroids and idx, a vector of # size m of centroid assignments (i.e. each entry in range [1..K]) # # Plot the data if we are plotting progress if plot_progress: fig = figure() hold(True) # Initialize values m, n = shape(X) K = size(initial_centroids, 0) centroids = initial_centroids previous_centroids = centroids idx = zeros(m) # Run K-Means for i in range(max_iters): # Output progress print 'K-Means iteration %d/%d...' % (i + 1, max_iters) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids fig.show() print 'Press enter to continue.' raw_input() # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) # Hold off if we are plotting progress if plot_progress: hold(True) return centroids, idx
def runkMeans(X,initial_centroids,max_iters,K): centroids = initial_centroids centroid_history = [] idx = None for i in range(max_iters): idx = findClosestCentroids(X, centroids) centroid_history.append(centroids) centroids = computeCentroids(X,idx,K) plotProgresskMeans(i,centroid_history , K) plt.scatter(X[:,0],X[:,1],c=idx,marker='o') plt.show() return centroids , idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): """runs the K-Means algorithm on data matrix X, where each row of X is a single example. It uses initial_centroids used as the initial centroids. max_iters specifies the total number of interactions of K-Means to execute. plot_progress is a true/false flag that indicates if the function should also plot its progress as the learning happens. This is set to false by default. runkMeans returns centroids, a Kxn matrix of the computed centroids and idx, a m x 1 vector of centroid assignments (i.e. each entry in range [1..K]) """ # Plot the data if we are plotting progress if plot_progress: plt.figure() # Initialize values m, n = X.shape K = len(initial_centroids) centroids = initial_centroids previous_centroids = centroids idx = np.zeros(m) c = itertools.cycle('012') rgb = np.eye(3) # Run K-Means for i in range(max_iters): # Output progress print 'K-Means iteration %d/%d...' % (i, max_iters) # For each example in X, assign it to the closest centroid _, idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: color = rgb[int(next(c))] plotProgresskMeans(X, np.array(centroids), np.array(previous_centroids), idx, K, i, color) previous_centroids = centroids # raw_input("Press Enter to continue...") # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) # Hold off if we are plotting progress if plot_progress: pass # hold off return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress): m, n = np.shape(X) K = np.size(initial_centroids, axis=0) centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) if plot_progress: plotDataPoint(X, idx, K) for i in range(max_iters): idx = findClosestCentroids(X, centroids) if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids centroids = computeCentroids(X, idx, K) plt.show() return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress): """ My Manual kMeans implementation.""" #number of datapoints m = X.shape[0] #number of dimensions n = X.shape[1] #number of centroids K = initial_centroids.shape[0] #indicies & centroids to be returned indicies = np.zeros(X.shape[0], dtype=np.int32) centroids = initial_centroids previous_centroids = centroids #setup figure for plotting if plot_progress == True: fig = pyplot.figure() pyplot.ion() ax = pyplot.axes() ax.set_title('K-means centroid evolution') pyplot.ylabel('Feature 2') pyplot.xlabel('Feature 1') pyplot.show() # Run K-Means, Loop over for number of iterations specified in input for i in xrange(0, max_iters): # For each example in X, assign it to the closest centroid indicies = findClosestCentroids(X, centroids) if plot_progress == True: # Output progress print 'K-Means iteration ', i, '/', max_iters, ' ...\n' ax.scatter(X[:, 0], X[:, 1], c=indicies, label='Data Points') ax.scatter(centroids[:, 0], centroids[:, 1], marker='x', c='r', linewidth=2, label='Centroids') previous_centroids = centroids if i == 0: ax.legend() pyplot.draw() raw_input("Press any key to continue") # compute new centroids centroids = computeCentroids(X, indicies, K) return centroids, indicies
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): ''' [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... plot_progress) runs the K-Means algorithm on data matrix X, where each row of X is a single example. It uses initial_centroids used as the initial centroids. max_iters specifies the total number of interactions of K-Means to execute. plot_progress is a true/false flag that indicates if the function should also plot its progress as the learning happens. This is set to false by default. runkMeans returns centroids, a Kxn matrix of the computed centroids and idx, a m x 1 vector of centroid assignments (i.e. each entry in range [1..K]) ''' import numpy as np from findClosestCentroids import findClosestCentroids from plotProgresskMeans import plotProgresskMeans from computeCentroids import computeCentroids import matplotlib.pyplot as plt # Initialize values m, _ = np.shape(X) K = np.size(initial_centroids, 0) centroids = initial_centroids acc_centroids = centroids idx = np.zeros((m, 1)) # Run K-Means for i in range(max_iters): # Output progress print('K-Means iteration %d/%d...\n' % (i, max_iters)) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, acc_centroids, idx, K, i) plt.show() # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) acc_centroids = np.append(acc_centroids, centroids, axis=0) return centroids, idx
def runkMeans(name, X, initial_centroids, max_iters, plot_progress=False): #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X #is a single example # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... # plot_progress) runs the K-Means algorithm on data matrix X, where each # row of X is a single example. It uses initial_centroids used as the # initial centroids. max_iters specifies the total number of interactions # of K-Means to execute. plot_progress is a true/false flag that # indicates if the function should also plot its progress as the # learning happens. This is set to false by default. runkMeans returns # centroids, a Kxn matrix of the computed centroids and idx, a m x 1 # vector of centroid assignments (i.e. each entry in range [1..K]) # # Initialize values m, n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = [] idx = None idx_history = [] # Run K-Means for i in range(max_iters): # Output progress print('K-Means iteration %d/%d...' % (i, max_iters)) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) idx_history.append(idx) previous_centroids.append(centroids) # Optionally, plot progress here if plot_progress: fig = plt.figure() plotProgresskMeans(X, centroids, previous_centroids, idx_history, K, i) plt.savefig('figure%s.%03d.png' % (name, i)) # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) #end #end return (centroids, idx)
def runKmeans(X, centroids, max_iters, plot_progress): m, n = X.shape K = centroids.shape[0] centroids = centroids previous_centroids = centroids idx = np.zeros([m, 1]) for i in range(max_iters): print('K-Means iteration {}/{}...\n'.format(i + 1, max_iters)) idx = findClosestCentroids(X, centroids) if plot_progress: plotProgressKmeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids input('Press enter to continue.\n') centroids = computeCentroids(X, idx, K) return (centroids, idx)
def runkMeans(data, initial_centroids,max_iters, plot_progress): # Initialize values (m,n) = np.shape(data); print(m) print(n) k = len(initial_centroids) centroids = initial_centroids; previous_centroids = centroids; idx = [0] * m; for i in range(max_iters): print('K-Means iteration #d/#d...\n', i, max_iters); idx = findClosestCentroids(data, centroids); if plot_progress: # plotProgresskMeans(data, centroids, previous_centroids, idx, k, i); plotDataPoints(data, idx, k) previous_centroids = centroids; print('Press enter to continue.\n'); # pause; # Given the memberships, compute new centroids centroids = computeCentroids(data, idx, k); return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): # Initialize values m, n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) # Run K-Means for i in range(1, max_iters + 1): print('K-Means iteration %d/%d' % (i, max_iters)) idx = findClosestCentroids(X, centroids) if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids centroids = computeCentroids(X, idx, K) return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=None): # Set default value for plot progress if plot_progress == None: plot_progress = False # Plot the data if we are plotting progress if plot_progress: plt.figure() # Initialize values m = X.shape[0] n = X.shape[1] K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids idx = np.zeros(m) idx = idx.astype(int) # Run K-Means for i in range(max_iters): # Output progress print('K-Means iteration %d/%d...\n' % (i, max_iters)) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # For each example in X, assign it to the closest centroid if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids.copy() input('Press enter to continue.\n') # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) return centroids, idx
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): """ Функция выполняет алгоритм K-средних для матрицы объекты-признаки X. Формальный параметр initial_centroids определяет начальное расположение средних, max_iters определяет число итераций алгоритма, а plot_progress отвечает за визуализацию процесса сходимости в ходе обучения """ K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids if plot_progress: plt.plot(X[:, 0], X[:, 1], 'bo') for i in range(max_iters): idx = findClosestCentroids(X, centroids) # первый шаг алгоритма K-средних if plot_progress: for j in range(K): plt.plot([centroids[j, 0], previous_centroids[j, 0]], [centroids[j, 1], previous_centroids[j, 1]], 'r-x') previous_centroids = centroids # второй шаг алгоритма K-средних centroids = computeCentroids(X, idx, K) # третий шаг алгоритма K-средних if plot_progress: plt.xlabel('Первый признак') plt.ylabel('Второй признак') plt.grid() plt.show() return (centroids, idx)
def runkMeans(X, initial_centroids, max_iters, plot_progress = False): # RUNKMEANS runs the K - Means algorithm on data matrix X, where each row of X # is a single example # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... # plot_progress) runs the K - Means algorithm on data matrix X, where each # row of X is a single example.It uses initial_centroids used as the # initial centroids.max_iters specifies the total number of interactions # of K - Means to execute.plot_progress is a true / false flag that # indicates if the function should also plot its progress as the # learning happens.This is set to false by default.runkMeans returns # centroids, a Kxn matrix of the computed centroids and idx, a m x 1 # vector of centroid assignments(i.e.each entry in range[1..K]) # Initialize values m, n = np.shape(X) K = np.size(initial_centroids, 0) centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) # Run K - Means for i in range(max_iters): print('K-Means iteration #', i,' out of ', max_iters) # For each example in X, assign it to the closest centroid idx = findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids # Given the memberships, compute new centroids centroids = computeCentroids(X, idx, K) return centroids, idx
def output(partId): # Random Test Cases X = np.sin(np.arange(1, 166)).reshape(15, 11, order='F') Z = np.cos(np.arange(1, 122)).reshape(11, 11, order='F') C = Z[:5, :] idx = np.arange(1, 16) % 3 if partId == '1': idx = findClosestCentroids(X, C) + 1 out = formatter('%0.5f ', idx.ravel('F')) elif partId == '2': centroids = computeCentroids(X, idx, 3) out = formatter('%0.5f ', centroids.ravel('F')) elif partId == '3': U, S = pca(X) out = formatter( '%0.5f ', np.abs(np.hstack([U.ravel('F'), np.diag(S).ravel('F')]))) elif partId == '4': X_proj = projectData(X, Z, 5) out = formatter('%0.5f ', X_proj.ravel('F')) elif partId == '5': X_rec = recoverData(X[:, :5], Z, 5) out = formatter('%0.5f ', X_rec.ravel('F')) return out
# into two functions -- findClosestCentroids and computeCentroids. In this # part, you should complete the code in the findClosestCentroids function. # print('Finding closest centroids.') # Load an example dataset that we will be using data = scipy.io.loadmat('ex7data2.mat') X = data['X'] # Select an initial set of centroids K = 3 # 3 Centroids initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Find the closest centroids for the examples using the # initial_centroids idx = findClosestCentroids(X, initial_centroids) print('Closest centroids for the first 3 examples:') print(idx[0:3].tolist()) print('(the closest centroids should be 0, 2, 1 respectively)') input('Program paused. Press <Enter> to continue...') ## ===================== Part 2: Compute Means ========================= # After implementing the closest centroids function, you should now # complete the computeCentroids function. # print('Computing centroids means.') # Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, idx, K)
# ================= Part 1: Find Closest Centroids ==================== # To help you implement K-Means, we have divided the learning algorithm # into two functions -- findClosestCentroids and computeCentroids. In this # part, you shoudl complete the code in the findClosestCentroids function. print('Finding closest centroids.') data = loadmat('ex7data2.mat') X = data['X'] # Select an initial set of centroids K = 3 initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Find the closest centroids for the examples using the initial_centroids idx = findClosestCentroids(X, initial_centroids) print('Closest centroids for the first 3 examples:') print(idx[:3].ravel()) print('(the closest centroids should be 1, 3, 2 respectively)\n') # ===================== Part 2: Compute Means ========================= # After implementing the closest centroids function, you should now # complete the computeCentroids function. print('Computing centroids means.') # Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, idx, K) print('Centroids computed after initial finding of closest centroids:') print(centroids)
import time as time # Step 1 show K-means in action print 'Finding closest centroids.\n\n' # load MATLAB file containing 2D data mat_contents = sio.loadmat('ex7data2.mat') X = mat_contents['X'] # Select an initial set of centroids and define the number of itterations K = 3 # 3 Centroids initial_centroids = np.array([[3, 3], [6, 2], [8, 5]], dtype=np.float32) max_iters = 10 # Find the closest centroids for the examples using the initial_centroids indicies = findClosestCentroids(X, initial_centroids) print 'Closest centroids for the first 3 examples: \n' print indicies[0], indicies[1], indicies[2] print '\n(the closest centroids should be 0, 2, 1 respectively)\n' raw_input("Press any key to continue") print '\nComputing centroids means.\n\n' #Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, indicies, K) print 'Centroids computed after initial finding of closest centroids: \n' print centroids[0], '\n', centroids[1], '\n', centroids[2] print '\nthe centroids should be\n' print ' [ 2.428301 3.157924 ]\n'
def runkMeans(X, initial_centroids, max_iters, plot_progress=False): #RUNKMEANS runs the K-Means algorithm on data matrix X, where each row of X #is a single example # [centroids, idx] = RUNKMEANS(X, initial_centroids, max_iters, ... # plot_progress) runs the K-Means algorithm on data matrix X, where each # row of X is a single example. It uses initial_centroids used as the # initial centroids. max_iters specifies the total number of interactions # of K-Means to execute. plot_progress is a true/false flag that # indicates if the function should also plot its progress as the # learning happens. This is set to false by default. runkMeans returns # centroids, a Kxn matrix of the computed centroids and idx, a m x 1 # vector of centroid assignments (i.e. each entry in range [1..K]) # # Set default value for plot progress # (commented out due to pythonic default parameter assignment above) # if not plot_progress: # plot_progress = False # Plot the data if we are plotting progress # if plot_progress: # plt.hold(True) # Initialize values m, n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m, 1)) # if plotting, set up the space for interactive graphs # http://stackoverflow.com/a/4098938/583834 # http://matplotlib.org/faq/usage_faq.html#what-is-interactive-mode if plot_progress: plt.close() plt.ion() # Run K-Means for i in xrange(max_iters): # Output progress sys.stdout.write('\rK-Means iteration {:d}/{:d}...'.format(i+1, max_iters)) sys.stdout.flush() # For each example in X, assign it to the closest centroid idx = fcc.findClosestCentroids(X, centroids) # Optionally, plot progress here if plot_progress: ppkm.plotProgresskMeans(X, centroids, previous_centroids, idx, K, i) previous_centroids = centroids raw_input('Press enter to continue.') # Given the memberships, compute new centroids centroids = cc.computeCentroids(X, idx, K) # Hold off if we are plotting progress print('\n') # if plot_progress: # plt.hold(False) return centroids, idx
def ex7(): ## Machine Learning Online Class # Exercise 7 | Principle Component Analysis and K-Means Clustering # # Instructions # ------------ # # This file contains code that helps you get started on the # exercise. You will need to complete the following functions: # # pca.m # projectData.m # recoverData.m # computeCentroids.m # findClosestCentroids.m # kMeansInitCentroids.m # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. # ## Initialization #clear ; close all; clc ## ================= Part 1: Find Closest Centroids ==================== # To help you implement K-Means, we have divided the learning algorithm # into two functions -- findClosestCentroids and computeCentroids. In this # part, you shoudl complete the code in the findClosestCentroids function. # print('Finding closest centroids.\n') # Load an example dataset that we will be using mat = scipy.io.loadmat('ex7data2.mat') X = mat['X'] # Select an initial set of centroids K = 3 # 3 Centroids initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Find the closest centroids for the examples using the # initial_centroids idx = findClosestCentroids(X, initial_centroids) print('Closest centroids for the first 3 examples: ') print(formatter(' %d', idx[:3] + 1)) print('\n(the closest centroids should be 1, 3, 2 respectively)') print('Program paused. Press enter to continue.') #pause ## ===================== Part 2: Compute Means ========================= # After implementing the closest centroids function, you should now # complete the computeCentroids function. # print('\nComputing centroids means.\n') # Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, idx, K) print('Centroids computed after initial finding of closest centroids: ') print(centroids) print('\n(the centroids should be') print(' [ 2.428301 3.157924 ]') print(' [ 5.813503 2.633656 ]') print(' [ 7.119387 3.616684 ]\n') print('Program paused. Press enter to continue.') #pause ## =================== Part 3: K-Means Clustering ====================== # After you have completed the two functions computeCentroids and # findClosestCentroids, you have all the necessary pieces to run the # kMeans algorithm. In this part, you will run the K-Means algorithm on # the example dataset we have provided. # print('\nRunning K-Means clustering on example dataset.\n') # Load an example dataset mat = scipy.io.loadmat('ex7data2.mat') X = mat['X'] # Settings for running K-Means K = 3 max_iters = 10 # For consistency, here we set centroids to specific values # but in practice you want to generate them automatically, such as by # settings them to be random examples (as can be seen in # kMeansInitCentroids). initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Run K-Means algorithm. The 'true' at the end tells our function to plot # the progress of K-Means centroids, idx = runkMeans('1', X, initial_centroids, max_iters, True) print('\nK-Means Done.\n') print('Program paused. Press enter to continue.') #pause ## ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel on to it's closest centroid. # # You should now complete the code in kMeansInitCentroids.m # print('\nRunning K-Means clustering on pixels from an image.\n') # Load an image of a bird A = matplotlib.image.imread('bird_small.png') # If imread does not work for you, you can try instead # load ('bird_small.mat') A = A / 255 # Divide by 255 so that all values are in the range 0 - 1 # Size of the image #img_size = size(A) # Reshape the image into an Nx3 matrix where N = number of pixels. # Each row will contain the Red, Green and Blue pixel values # This gives us our dataset matrix X that we will use K-Means on. X = A.reshape(-1, 3) # Run your K-Means algorithm on this data # You should try different values of K and max_iters here K = 16 max_iters = 10 # When using K-Means, it is important the initialize the centroids # randomly. # You should complete the code in kMeansInitCentroids.m before proceeding initial_centroids = kMeansInitCentroids(X, K) # Run K-Means centroids, idx = runkMeans('2', X, initial_centroids, max_iters) print('Program paused. Press enter to continue.') #pause ## ================= Part 5: Image Compression ====================== # In this part of the exercise, you will use the clusters of K-Means to # compress an image. To do this, we first find the closest clusters for # each example. After that, we print('\nApplying K-Means to compress an image.\n') # Find closest cluster members idx = findClosestCentroids(X, centroids) # Essentially, now we have represented the image X as in terms of the # indices in idx. # We can now recover the image from the indices (idx) by mapping each pixel # (specified by it's index in idx) to the centroid value X_recovered = centroids[idx,:].reshape(A.shape) # Reshape the recovered image into proper dimensions X_recovered = X_recovered.reshape(A.shape) fig, ax = plt.subplots(1, 2, figsize=(8, 4)) # Display the original image ax[0].imshow(A * 255) ax[0].grid(False) ax[0].set_title('Original') # Display compressed image side by side ax[1].imshow(X_recovered * 255) ax[1].grid(False) ax[1].set_title('Compressed, with %d colors' % K) plt.savefig('figure3.png') print('Program paused. Press enter to continue.\n')
# =============== Часть 1. Поиск ближайших средних =============== print('Часть 1. Поиск ближайших средних') # Загрузка данных и формирование матрицы объекты-признаки X data = spi.loadmat('data.mat') X = data['X'] # Выбор начального множества средних K = 3 # число средних initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Поиск ближайших средних с учетом их начальных значений idx = findClosestCentroids(X, initial_centroids) print( 'Номера ближайших средних для первых трех примеров: {:.0f} {:.0f} {:.0f}'. format(idx[0, 0], idx[1, 0], idx[2, 0])) input('Программа остановлена. Нажмите Enter для продолжения ... \n') # ============= Часть 2. Вычисление значений средних ============= print('Часть 2. Вычисление значений средних') # Вычисление значений средних centroids = computeCentroids(X, idx, K) print('Вычисленные значения средних:')
# When using K-Means, it is important the initialize the centroids randomly. # You should complete the code in kMeansInitCentroids.m before proceeding initial_centroids = kMeansInitCentroids(X, K) # Run K-Means centroids, idx = runkMeans(X, initial_centroids, max_iters) # ================= Part 5: Image Compression ====================== # In this part of the exercise, you will use the clusters of K-Means to # compress an image. To do this, we first find the closest clusters for # each example. After that, we print('\nApplying K-Means to compress an image.\n\n') # Find closest cluster members idx = findClosestCentroids(X, centroids) # Essentially, now we have represented the image X as in terms of the # indices in idx. # We can now recover the image from the indices (idx) by mapping each pixel # (specified by its index in idx) to the centroid value X_recovered = centroids[idx.astype(int) - 1, :] # Reshape the recovered image into proper dimensions X_recovered = X_recovered.reshape(img_size[0], img_size[1], 3) # Display the original image plt.subplot(1, 2, 1) plt.imshow(A) plt.title('Original')
x = image.shape[0] y = image.shape[1] image_formatted = np.reshape(image, (x * y, 3)) # print(image) k = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(image_formatted, k) print(initial_centroids) # Run K-Means [centroids, idx] = runkMeans(image_formatted, initial_centroids, max_iters, False) idx = findClosestCentroids(image_formatted, centroids) feature_size = len(image_formatted[0]) len_idx = len(idx) X_recovered = [[0] * feature_size for _ in range(len_idx)] for i in range(len_idx): for j in range(feature_size): X_recovered[i][j] = centroids[idx[i] - 1][j] X_recovered = np.reshape(X_recovered, (x, y, 3)) X_recovered = X_recovered * 255 plt.subplot(1, 2, 1) plt.imshow(image1) plt.title('Original')