def test_PCA_for_Visualization(self): plt.close() # Re-load the image from the previous exercise and run K-Means on it # For this to work, you need to complete the K-Means assignment first # A = double(imread('bird_small.png')); # If imread does not work for you, you can try instead mat = scipy.io.loadmat('resource/bird_small.mat') A = mat["A"] A = A / 255 image_size = np.shape(A) X = A.reshape(image_size[0] * image_size[1], 3) K = 16 max_iters = 10 from ex7_K_means_Clustering_and_Principal_Component_Analysis.kMeansInitCentroids import kMeansInitCentroids initial_centroids = kMeansInitCentroids(X, K) from ex7_K_means_Clustering_and_Principal_Component_Analysis.runkMeans import runkMeans centorids, idx = runkMeans(X, initial_centroids, max_iters, True) # Sample 1000 random indexes(since working with all the data is # too expensive.If you have a fast computer, you may increase this. sel = np.floor(np.random.rand(1000, 1) * X.shape[0]).astype(int).flatten() # Setup Color Palette from utils.hsv import hsv palette = hsv(K) colors = np.array([palette[int(i)] for i in idx[sel]]) # fig = plt.figure() # ax = fig.add_subplot(111, projection='3d') # ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=colors) # plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships') # plt.show(block=False) # === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization from ex5_regularized_linear_regressionand_bias_vs_variance.featureNormalize import featureNormalize # Subtract the mean to use PCA X_norm, _, _ = featureNormalize(X) # PCA and project the data to 2D from ex7_K_means_Clustering_and_Principal_Component_Analysis.pca import pca U, S = pca(X_norm) from ex7_K_means_Clustering_and_Principal_Component_Analysis.projectData import projectData Z = projectData(X_norm, U, 2) # Plot in 2D plt.figure(2) from ex7_K_means_Clustering_and_Principal_Component_Analysis.plotDataPoints import plotDataPoints plotDataPoints(Z[sel, :], idx[sel], K) plt.title( 'Pixel dataset plotted in 2D, using PCA for dimensionality reduction' ) plt.show(block=False)
def test_PCA_on_Face_Data_Eignfaces(self): print("Running PCA on face dataset.") print("this mght take a minute or two ...") # Load Face dataset mat = scipy.io.loadmat('resource/ex7faces.mat') X = np.array(mat["X"]) # Before running PCA, it is important to first normalize X by subtracting # the mean value from each feature from utils.featureNormalize import featureNormalize X_norm, _, _ = featureNormalize(X) # Run PCA from ex7_K_means_Clustering_and_Principal_Component_Analysis.pca import pca U, S = pca(X_norm) # Visualize the top 36 eigenvectors found from utils.displayData import displayData displayData(U[:, :36].T) # ============= Part 6: Dimension Reduction for Faces ================= # Project images to the eigen space using the top k eigenvectors # If you are applying a machine learning algorithm print("Dimension reduction for face dataset.") K = 100 from ex7_K_means_Clustering_and_Principal_Component_Analysis.projectData import projectData Z = projectData(X_norm, U, K) print("The projected data Z has a size of: {z}".format(z=np.shape(Z))) # ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== # Project images to the eigen space using the top K eigen vectors and # visualize only using those K dimensions # Compare to the original input, which is also displayed print("Visualizing the projected (reduced dimension) faces.") K = 100 from ex7_K_means_Clustering_and_Principal_Component_Analysis.recoverData import recoverData X_rec = recoverData(Z, U, K) # Display normalized data plt.close() plt.subplot(1, 2, 1) displayData(X_norm[:100, :]) plt.title('Original faces') plt.gca().set_aspect('equal', adjustable='box') # Display reconstructed data from only k eigenfaces plt.subplot(1, 2, 2) displayData(X_rec[:100, :]) plt.title('Recovered faces') plt.gca().set_aspect('equal', adjustable='box')
def polyFeatures(X, p): X_poly = X #np.zeros((X.shape[0], p)) for i in range(2, p + 1): X_at_i = X**i X_poly = np.hstack((X_poly, X_at_i)) return X_poly p = 8 # Map X onto Polynomial Features and Normalize X_poly = polyFeatures(X, p) # print(X_poly.shape) # print(X_poly[:,1]) # print(X) X_poly, mu, sigma = utils.featureNormalize(X_poly) X_poly = np.concatenate([np.ones((m, 1)), X_poly], axis=1) # Map X_poly_test and normalize (using mu and sigma) X_poly_test = polyFeatures(Xtest, p) # print(X_poly_test.shape) # print(X_poly_test[:,1]) # print(Xtest) X_poly_test -= mu X_poly_test /= sigma X_poly_test = np.concatenate([np.ones((ytest.size, 1)), X_poly_test], axis=1) # Map X_poly_val and normalize (using mu and sigma) X_poly_val = polyFeatures(Xval, p) X_poly_val -= mu X_poly_val /= sigma
# Visualize the example dataset pyplot.plot(X[:, 0], X[:, 1], 'bo', ms=10, mec='k', mew=1) pyplot.axis([0.5, 6.5, 2, 8]) pyplot.gca().set_aspect('equal') pyplot.grid(False) pyplot.show() ## =============== Part 2: Principal Component Analysis =============== # You should now implement PCA, a dimension reduction technique. You # should complete the code in pca.m # print('\nRunning PCA on example dataset.\n\n') # Before running PCA, it is important to first normalize X X_norm, mu, sigma = utils.featureNormalize(X) # Run PCA U, S = pca.pca(X_norm) # Compute mu, the mean of the each feature # Draw the eigenvectors centered at mean of data. These lines show the # directions of maximum variations in the dataset. fig, ax = pyplot.subplots() ax.plot(X[:, 0], X[:, 1], 'bo', ms=10, mec='k', mew=0.25) for i in range(2): ax.arrow(mu[0], mu[1], 1.5 * S[i] * U[0, i],
print('Загрузка данных...') X = [] Y = [] with open('ex1data3.csv') as f: scv_reader = csv.reader(f, delimiter=',') for row in scv_reader: X.append([row[0], row[1]]) Y.append([row[2]]) X = np.asarray(X).astype(np.float) Y = np.asarray(Y).astype(np.float) m = Y.size # Задача 1 - Нормализация признаков print('Нормализация признаков...') X, mu, sigma = ut.featureNormalize(X) ones_column = np.ones((m, 1)) X = np.hstack((ones_column, X)) # Задача 2 - Метод градиентного спуска print('Выполнение градиентного спуска...') alpha = 0.01 num_iters = 400 theta = np.zeros((3, 1)) theta, J_history = ut.gradient_descent(X, Y, theta, alpha, num_iters) fig = plt.figure() ax = plt.axes() plt.plot(np.arange(J_history.size), J_history) ax.set_xlabel("Число итераций")