def plotProgresskMeans(X, centroids, previous, idx, K, i, color): """plots the data points with colors assigned to each centroid. With the previous centroids, it also plots a line between the previous locations and current locations of the centroids. """ # Plot the examples plotDataPoints(X, idx) # Plot the centroids as black x's plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=60, lw=3, edgecolor='k') # Plot the history of the centroids with lines for j in range(len(centroids)): plt.plot([centroids[j, 0], previous[j, 0]], [centroids[j, 1], previous[j, 1]], c=color) # Title plt.title('Iteration number %d' % i) plt.show() input("Program paused. Press Enter to continue...")
def plotProgresskMeans(X, centroids, previous, idx, K, i): #PLOTPROGRESSKMEANS is a helper function that displays the progress of #k-Means as it is running. It is intended for use only with 2D data. # PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data # points with colors assigned to each centroid. With the previous # centroids, it also plots a line between the previous locations and # current locations of the centroids. # # Plot the examples plotDataPoints(X, idx, K, i) current = centroids for last in previous[::-1]: # Plot the centroids as black x's plt.plot(current[:, 0], current[:, 1], linestyle='None', marker='x', markeredgecolor='k', ms=10, lw=3) # Plot the history of the centroids with lines for j in range(current.shape[0]): drawLine(current[j, :], last[j, :]) current = last #end # Title plt.title('Iteration number %d' % i)
def plotProgresskMeans(X, centroids, idx, K, i=0): ''' PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data points with colors assigned to each centroid. With the previous centroids, it also plots a line between the previous locations and current locations of the centroids. ''' from plotDataPoints import plotDataPoints import matplotlib.pyplot as plt import numpy as np # Plot the examples plotDataPoints(X, idx, K) # Plot the centroids as black x's plt.plot(centroids[:,0], centroids[:, 1], 'x', \ markeredgecolor = '#414042', markersize = 7, markeredgewidth = 2) # Plot the history of the centroids with lines for j in range(K): # Group for centroids k = centroids[range(j, centroids.shape[0], K), :] plt.plot(k[:, 0], k[:, 1], color='k', linewidth=0.5) # Title plt.title('Iteration number %d' % (i))
def plotProgresskMeans(X, centroids, previous, idx, K, i): #PLOTPROGRESSKMEANS is a helper function that displays the progress of #k-Means as it is running. It is intended for use only with 2D data. # PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data # points with colors assigned to each centroid. With the previous # centroids, it also plots a line between the previous locations and # current locations of the centroids. # # plt.hold(True) # Plot the examples pdp.plotDataPoints(X, idx, K) # Plot the centroids as black x's plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=400, c='k', linewidth=1) # Plot the history of the centroids with lines for j in range(centroids.shape[0]): dl.drawLine(centroids[j, :], previous[j, :], c='b') # Title plt.title('Iteration number {:d}'.format(i + 1)) return
def plotProgresskMeans(X, centroids, previous, idx, K, i): #PLOTPROGRESSKMEANS is a helper function that displays the progress of #k-Means as it is running. It is intended for use only with 2D data. # PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data # points with colors assigned to each centroid. With the previous # centroids, it also plots a line between the previous locations and # current locations of the centroids. # # plt.hold(True) # Plot the examples pdp.plotDataPoints(X, idx, K) # Plot the centroids as black x's plt.scatter(centroids[:,0], centroids[:,1], marker='x', s=400, c='k', linewidth=1) # Plot the history of the centroids with lines for j in xrange(centroids.shape[0]): dl.drawLine(centroids[j, :], previous[j, :], c='b') # Title plt.title('Iteration number {:d}'.format(i+1)) return
def plotProgresskMeans(X, centroids, previous, idx, K, i): plotDataPoints(X, idx) plt.plot(previous[:, 0], previous[:, 1], 'rx', lw=3) plt.plot(centroids[:, 0], centroids[:, 1], 'kx', lw=3) for j in range(centroids.shape[0]): drawLine(centroids[j, :], previous[j, :]) plt.title('Iteration number %d' % i) plt.show(block=False)
def plotProgressKmeans(X, centroids, previous, idx, K, i): plotDataPoints(X, idx, K) plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', edgecolors='b') for j in range(centroids.shape[0]): drawLine(centroids[j, :], previous[j, :]) plt.title('Iteration number {}'.format(i)) plt.show()
def plotProgressKMeans(X, history_centroids, idx, K, i): #PLOTPROGRESSKMEANS is a helper function that displays the progress of #k-Means as it is running. It is intended for use only with 2D data. # PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data # points with colors assigned to each centroid. With the previous # centroids, it also plots a line between the previous locations and # current locations of the centroids. plotDataPoints(X, idx, K) plt.plot(history_centroids[0:i+1, :, 0], history_centroids[0:i+1, :, 1], linestyle='', marker='x', markersize=10, linewidth=3, color='k') plt.title('Iteration number {}'.format(i + 1)) for centroid_idx in range(history_centroids.shape[1]): for iter_idx in range(i): drawLine(history_centroids[iter_idx, centroid_idx, :], history_centroids[iter_idx + 1, centroid_idx, :])
def plotProgresskMeans(X, centroids, previous, idx, K, i): # Plot the examples plotDataPoints(X, idx, K) # Plot the centroids as black x's plt.plot(centroids[:, 0], centroids[:, 1], marker='x', markeredgecolor='k', markersize=10, linewidth=3, linestyle='None') #plt.scatter(centroids[:,0], centroids[:,1], marker='x', s=400, c='k', linewidth=1) # Plot the history of the centroids with lines for j in range(centroids.shape[0]): drawLine(centroids[j, :], previous[j, :], c='b') # Title plt.title('Iteration number ' + str(i)) #return plt
def plotProgresskMeans(X, centroids, previous, idx, K, i): #PLOTPROGRESSKMEANS is a helper function that displays the progress of #k-Means as it is running. It is intended for use only with 2D data. # PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data # points with colors assigned to each centroid. With the previous # centroids, it also plots a line between the previous locations and # current locations of the centroids. # # Plot the examples plotDataPoints(X, idx, K) # Plot the centroids as black x's plot(centroids[:,0], centroids[:,1], 'x', mec='k', ms=10, mew=3) # Plot the history of the centroids with lines for j in range(size(centroids, 0)): drawLine(centroids[j, :], previous[j, :], 'b') # Title title('Iteration number #%d' % (i+1))
def runkMeans(data, initial_centroids,max_iters, plot_progress): # Initialize values (m,n) = np.shape(data); print(m) print(n) k = len(initial_centroids) centroids = initial_centroids; previous_centroids = centroids; idx = [0] * m; for i in range(max_iters): print('K-Means iteration #d/#d...\n', i, max_iters); idx = findClosestCentroids(data, centroids); if plot_progress: # plotProgresskMeans(data, centroids, previous_centroids, idx, k, i); plotDataPoints(data, idx, k) previous_centroids = centroids; print('Press enter to continue.\n'); # pause; # Given the memberships, compute new centroids centroids = computeCentroids(data, idx, k); return centroids, idx
def plotProgresskMeans(X, centroids, previous, idx, K, i, color): """plots the data points with colors assigned to each centroid. With the previous centroids, it also plots a line between the previous locations and current locations of the centroids. """ # Plot the examples plotDataPoints(X, idx) # Plot the centroids as black x's plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', s=60, lw=3, edgecolor='k') # Plot the history of the centroids with lines for j in range(len(centroids)): plt.plot([centroids[j,0], previous[j,0]], [centroids[j,1], previous[j,1]], c=color) # Title plt.title('Iteration number %d' % i) show() raw_input("Program paused. Press Enter to continue...")
X[sel, 1], X[sel, 2], s=100, c=idx[sel], cmap=cm.hsv, vmax=K + 1, facecolors='none') title('Pixel dataset plotted in 3D. Color shows centroid memberships') fig.show() print 'Program paused. Press enter to continue.' raw_input() ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) # PCA and project the data to 2D U, s = pca(X_norm) Z = projectData(X_norm, U, 2) # Plot in 2D fig = figure() plotDataPoints(Z[sel, :], idx[sel], K) title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') fig.show() print 'Program paused. Press enter to continue.' raw_input()
marker='o', facecolors='none', lw=0.4, s=10) plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships') show() input('Program paused. Press Enter to continue...') ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) # PCA and project the data to 2D U, S, V = pca(X_norm) Z = projectData(X_norm, U, 2) # Plot in 2D plt.figure() zs = np.array([Z[s] for s in sel]) idxs = np.array([idx[s] for s in sel]) # plt.scatter(zs[:,0], zs[:,1]) plotDataPoints(zs, idxs) plt.title( 'Pixel dataset plotted in 2D, using PCA for dimensionality reduction') show() input('Program paused. Press Enter to continue...')
cmap = plt.get_cmap("jet") idxn = sel.astype('float')/max(sel.astype('float')) colors = cmap(idxn) # ax = Axes3D(fig) ax.scatter3D(xs, ys, zs=zs, edgecolors=colors, marker='o', facecolors='none', lw=0.4, s=10) plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships') show() raw_input('Program paused. Press Enter to continue...') ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) # PCA and project the data to 2D U, S, V = pca(X_norm) Z = projectData(X_norm, U, 2) # Plot in 2D plt.figure() zs = np.array([Z[s] for s in sel]) idxs = np.array([idx[s] for s in sel]) # plt.scatter(zs[:,0], zs[:,1]) plotDataPoints(zs, idxs) plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') show() raw_input('Program paused. Press Enter to continue...')
# too expensive. If you have a fast computer, you may increase this. sel = (random.rand(1000) * size(X, 0)).astype(int) # Visualize the data and centroid memberships in 3D fig = figure() ax = Axes3D(fig) ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=idx[sel], cmap=cm.hsv, vmax=K+1, facecolors='none') title('Pixel dataset plotted in 3D. Color shows centroid memberships') fig.show() print 'Program paused. Press enter to continue.' raw_input() ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) # PCA and project the data to 2D U, s = pca(X_norm) Z = projectData(X_norm, U, 2) # Plot in 2D fig = figure() plotDataPoints(Z[sel, :], idx[sel], K) title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') fig.show() print 'Program paused. Press enter to continue.' raw_input()
def ex7_pca(): ## Machine Learning Online Class # Exercise 7 | Principle Component Analysis and K-Means Clustering # # Instructions # ------------ # # This file contains code that helps you get started on the # exercise. You will need to complete the following functions: # # pca.m # projectData.m # recoverData.m # computeCentroids.m # findClosestCentroids.m # kMeansInitCentroids.m # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. # ## Initialization #clear ; close all; clc ## ================== Part 1: Load Example Dataset =================== # We start this exercise by using a small dataset that is easily to # visualize # print('Visualizing example dataset for PCA.\n') # The following command loads the dataset. You should now have the # variable X in your environment mat = scipy.io.loadmat('ex7data1.mat') X = mat['X'] # Visualize the example dataset plt.plot(X[:, 0], X[:, 1], 'wo', ms=10, mec='b', mew=1) plt.axis([0.5, 6.5, 2, 8]) plt.savefig('figure1.png') print('Program paused. Press enter to continue.') #pause ## =============== Part 2: Principal Component Analysis =============== # You should now implement PCA, a dimension reduction technique. You # should complete the code in pca.m # print('\nRunning PCA on example dataset.\n') # Before running PCA, it is important to first normalize X X_norm, mu, sigma = featureNormalize(X) # Run PCA U, S = pca(X_norm) # Compute mu, the mean of the each feature # Draw the eigenvectors centered at mean of data. These lines show the # directions of maximum variations in the dataset. #hold on print(S) print(U) drawLine(mu, mu + 1.5 * np.dot(S[0], U[:,0].T)) drawLine(mu, mu + 1.5 * np.dot(S[1], U[:,1].T)) #hold off plt.savefig('figure2.png') print('Top eigenvector: ') print(' U(:,1) = %f %f ' % (U[0,0], U[1,0])) print('\n(you should expect to see -0.707107 -0.707107)') print('Program paused. Press enter to continue.') #pause ## =================== Part 3: Dimension Reduction =================== # You should now implement the projection step to map the data onto the # first k eigenvectors. The code will then plot the data in this reduced # dimensional space. This will show you what the data looks like when # using only the corresponding eigenvectors to reconstruct it. # # You should complete the code in projectData.m # print('\nDimension reduction on example dataset.\n\n') # Plot the normalized dataset (returned from pca) fig = plt.figure() plt.plot(X_norm[:, 0], X_norm[:, 1], 'bo') # Project the data onto K = 1 dimension K = 1 Z = projectData(X_norm, U, K) print('Projection of the first example: %f' % Z[0]) print('\n(this value should be about 1.481274)\n') X_rec = recoverData(Z, U, K) print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1])) print('\n(this value should be about -1.047419 -1.047419)\n') # Draw lines connecting the projected points to the original points plt.plot(X_rec[:, 0], X_rec[:, 1], 'ro') for i in range(X_norm.shape[0]): drawLine(X_norm[i,:], X_rec[i,:]) #end plt.savefig('figure3.png') print('Program paused. Press enter to continue.\n') #pause ## =============== Part 4: Loading and Visualizing Face Data ============= # We start the exercise by first loading and visualizing the dataset. # The following code will load the dataset into your environment # print('\nLoading face dataset.\n\n') # Load Face dataset mat = scipy.io.loadmat('ex7faces.mat') X = mat['X'] # Display the first 100 faces in the dataset displayData(X[:100, :]) plt.savefig('figure4.png') print('Program paused. Press enter to continue.\n') #pause ## =========== Part 5: PCA on Face Data: Eigenfaces =================== # Run PCA and visualize the eigenvectors which are in this case eigenfaces # We display the first 36 eigenfaces. # print('\nRunning PCA on face dataset.\n(this mght take a minute or two ...)\n') # Before running PCA, it is important to first normalize X by subtracting # the mean value from each feature X_norm, mu, sigma = featureNormalize(X) # Run PCA U, S = pca(X_norm) # Visualize the top 36 eigenvectors found displayData(U[:, :36].T) plt.savefig('figure5.png') print('Program paused. Press enter to continue.') #pause ## ============= Part 6: Dimension Reduction for Faces ================= # Project images to the eigen space using the top k eigenvectors # If you are applying a machine learning algorithm print('\nDimension reduction for face dataset.\n') K = 100 Z = projectData(X_norm, U, K) print('The projected data Z has a size of: ') print(formatter('%d ', Z.shape)) print('\n\nProgram paused. Press enter to continue.') #pause ## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== # Project images to the eigen space using the top K eigen vectors and # visualize only using those K dimensions # Compare to the original input, which is also displayed print('\nVisualizing the projected (reduced dimension) faces.\n') K = 100 X_rec = recoverData(Z, U, K) # Display normalized data #subplot(1, 2, 1) displayData(X_norm[:100,:]) plt.gcf().suptitle('Original faces') #axis square plt.savefig('figure6.a.png') # Display reconstructed data from only k eigenfaces #subplot(1, 2, 2) displayData(X_rec[:100,:]) plt.gcf().suptitle('Recovered faces') #axis square plt.savefig('figure6.b.png') print('Program paused. Press enter to continue.') #pause ## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === # One useful application of PCA is to use it to visualize high-dimensional # data. In the last K-Means exercise you ran K-Means on 3-dimensional # pixel colors of an image. We first visualize this output in 3D, and then # apply PCA to obtain a visualization in 2D. #close all; close all; clc # Re-load the image from the previous exercise and run K-Means on it # For this to work, you need to complete the K-Means assignment first A = matplotlib.image.imread('bird_small.png') # If imread does not work for you, you can try instead # load ('bird_small.mat') A = A / 255 X = A.reshape(-1, 3) K = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(X, K) centroids, idx = runkMeans('7', X, initial_centroids, max_iters) # Sample 1000 random indexes (since working with all the data is # too expensive. If you have a fast computer, you may increase this. sel = np.random.choice(X.shape[0], size=1000) # Setup Color Palette #palette = hsv(K) #colors = palette(idx(sel), :) # Visualize the data and centroid memberships in 3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], cmap='rainbow', c=idx[sel], s=8**2) ax.set_title('Pixel dataset plotted in 3D. Color shows centroid memberships') plt.savefig('figure8.png') print('Program paused. Press enter to continue.') #pause ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) # PCA and project the data to 2D U, S = pca(X_norm) Z = projectData(X_norm, U, 2) # Plot in 2D fig = plt.figure() plotDataPoints(Z[sel, :], [idx[sel]], K, 0) plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') plt.savefig('figure9.png') print('Program paused. Press enter to continue.\n')
# function plotProgresskMeans(X, centroids, previous, idx, K, i) #PLOTPROGRESSKMEANS is a helper function that displays the progress of #k-Means as it is running. It is intended for use only with 2D data. # PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data # points with colors assigned to each centroid. With the previous # centroids, it also plots a line between the previous locations and # current locations of the centroids. # # Plot the examples plotDataPoints(X, idx, K); # Plot the centroids as black x's plot(centroids(:,1), centroids(:,2), 'x', ... 'MarkerEdgeColor','k', ... 'MarkerSize', 10, 'LineWidth', 3); # Plot the history of the centroids with lines for j=1:size(centroids,1) drawLine(centroids(j, :), previous(j, :)); end # Title title(sprintf('Iteration number #d', i)) end from plotDataPoints import plotDataPoints def plotProgresskMeans(data, centroids, previous, idx, k ,i): plotDataPoints(data, idx, k)
def plotProgresskMeans(data, centroids, previous, idx, k ,i): plotDataPoints(data, idx, k)