def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) # D is symmetric matrix geoD = np.zeros((n, n)) # find nn-neighbours for i in range(n): sort = np.argsort(D[:, i]) neigh = np.setdiff1d(sort[0:self.nn + 1], i) # find the nn+1 smallest indexes that are not i for j in range(len(neigh)): t = neigh[j] geoD[i, t] = D[i, t] geoD[t, i] = D[t, i] D = utils.dijkstra(geoD) # for disconnected vertices (distance is Inf) # set their dist = max_dist(graph) # to encourage they are far away from each other D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) # Construct nearest neighbour graph G = np.zeros([n, n]) for i in range(n): neighbours = np.argsort(D[i])[:self.nn + 1] for j in neighbours: G[i, j] = D[i, j] G[j, i] = D[j, i] # Compute ISOMAP distances D = utils.dijkstra(G) # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) sorted_indices = np.argsort(D) G = np.zeros((n, n)) for i in range(D.shape[0]): for j in range(self.nn + 1): G[i, sorted_indices[i, j]] = D[i, sorted_indices[i, j]] G[sorted_indices[i, j], i] = D[sorted_indices[i, j], i] dist = utils.dijkstra(G) dist[np.isinf(dist)] = dist[~np.isinf(dist)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, dist) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X,X) D = np.sqrt(D) #TODO: D = self.construct_dist_graph(X , D) # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z,f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] k = self.k # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) # Initialize low-dimensional representation with PCA pca = PCA(k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) np.fill_diagonal(D, np.inf) ######## #"finding the neighbor at each point" G = np.matrix(np.ones((n, n)) * 0) for i in range(n): neighbours = np.argsort(D[:, i]) #want only the k nearest for j in neighbours[1:self.nn + 1]: G[i, j] = D[i, j] G[j, i] = D[j, i] #weighted shortest path between points (dijksta's) D = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): D[i, j] = utils.dijkstra(G, i, j) ######## # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) ######## # TODO # G = np.full((n, n), np.inf) for i in range(n): for j in range(n): #temp = np.list(D[i]).sort temp = sorted(D[i]) #print(self.nn+1) if D[i][j] in temp[:(self.nn + 1)]: G[i][j] = D[i][j] for i in range(n): for j in range(n): D[i][j] = utils.dijkstra(G, i, j) ######## # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() #G[np.isinf(G)] = G[~np.isinf(G)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
for i in range(n): plt.annotate(animals[i], (X[i, f1], X[i, f2])) utils.savefig('two_random_features.png') elif question == '2.2': dataset = load_dataset('animals.pkl') X = dataset['X'].astype(float) animals = dataset['animals'] n, d = X.shape # standardize columns X = utils.standardize_cols(X) model = PCA(k=2) model.fit(X) Z = model.compress(X) fig, ax = plt.subplots() plt.ylabel('z2') plt.xlabel('z1') ax.scatter(Z[:, 0], Z[:, 1]) for i in range(n): ax.annotate(animals[i], (Z[i, 0], Z[i, 1])) utils.savefig('q2_2_PCA_animals.png') elif question == '3.1': X = load_dataset('highway.pkl')['X'].astype(float) / 255 n, d = X.shape print(n, d) h, w = 64, 64 # height and width of each image
choices=['1.2', '2.1', '3', '3.1', '3.2']) io_args = parser.parse_args() question = io_args.question if question == '1.2': dataset = utils.load_dataset('animals') X = dataset['X'].astype(float) animals = dataset['animals'] n, d = X.shape k = 5 X = utils.standardize_cols(X) # standardize columns model = PCA(k=2) model.fit(X) Z = model.compress(X) # Plot the matrix plt.imshow(Z) utils.savefig('q1_unsatisfying_visualization_1.png') ## Randomly plot two features, and label all points fig, ax = plt.subplots() ax.scatter(Z[:, 0], Z[:, 1]) for i in range(n): ax.annotate(animals[i], (Z[i, 0], Z[i, 1])) utils.savefig('q1_unsatisfying_visualization_2.png') v = 1 - norm(np.dot(Z, model.W) - X, 'fro')**2 / norm(X, 'fro')**2 print v #The variance
print("[+] Processing data...") X = (data[:, 1:].astype(np.int) - 127.5) / 127.5 y = data[:, 0].astype(np.int) print("[+] Running PCA...") pca = PCA() X = pca.fit_compress(X, 500) print("[+] Fitting neural net...") model = NeuralNetwork((500, 300, 100, 10), alpha=8e-2, reg=1e-3, batch_size=60, epochs=3, momentum = 0.8) model.fit(X, y) print("[+] Loading test data...") reader = csv.reader(open("mnist_test.csv", "r")) data = np.array(list(reader)) print("[+] Processing data...") X = (data[:, 1:].astype(np.int) - 127.5) / 127.5 y = data[:, 0].astype(np.int) print("[+] Compressing data...") X = pca.compress(X) print("[+] Making predictions...") predictions = np.array(model.predict(X)) print("[+] Calculating accuracy...") accuracy = sum(predictions == y) / len(y) print(accuracy)
fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) sns.scatterplot(features_tsne[:, 0], features_tsne[:, 1], hue=labels, legend='full') ax.set_title("T-SNE on Iris Data-Set", fontsize=16) ################################################## print("Plotting PCA projection of data-set and classifier.") pca = PCA() pca.analyze(features) pca.save("iris_results/iris") features_compressed = pca.compress(features, 2) fig = pyplot.figure() ax = fig.add_subplot(1, 1, 1) ax.set_title('MLP-Classification of the Iris Data-Set', fontsize=16) ax.set_xlim([-4.0, 4.0]) ax.set_xlabel("PCA Component 0", fontsize=12) ax.set_ylim([-1.5, 1.5]) ax.set_ylabel("PCA Component 1", fontsize=12) XX, YY = np.meshgrid(np.arange(*ax.get_xlim(), 0.005), np.arange(*ax.get_ylim(), 0.005)) XY = np.vstack((XX.ravel(), YY.ravel())).T ZZ = np.argmax(model.predict(pca.decompress(XY)), axis=1).reshape(XX.shape) ax.contourf(XX, YY, ZZ + 1e-6, levels=3, colors=['g', 'b', 'r'], alpha=0.2)
for d in dimensions[:-1]: name += '_' + str(d) print(name) ################################################## pca = PCA() new_pca = False if new_pca: eigs = pca.analyze(samples_train) pca.save("faces_results/faces") else: pca.load("faces_results/faces") samples_train_compressed = pca.compress(samples_train, dimensionality=dimensions[0]) samples_test_compressed = pca.compress(samples_test, dimensionality=dimensions[0]) ################################################## mlp = MLP(dimensions) new_mlp = False if new_mlp: mlp.train(samples_train_compressed, targets_train, max_epochs=200, step=0.1, gain=0.9) mlp.save(name) else: