from sklearn.decomposition import PCA from scipy.spatial import distance import generate_data import settings import logging import numpy as np logging.basicConfig(level=logging.INFO) X_mnist_raw = generate_data.load_x_mnist_raw() num_pca_dimensions = settings.parameters.get( "num_pca_dimensions", settings.parameters["num_pca_dimensions"]) pca_random_seed = settings.parameters.get( "pca_random_seed", settings.parameters["pca_random_seed"]) X_mnist_old = np.zeros((X_mnist_raw.shape[0], num_pca_dimensions)) for i in range(1000): mnist_pca = PCA(n_components=num_pca_dimensions, svd_solver='full', random_state=i) X_mnist = mnist_pca.fit_transform(X_mnist_raw) D = distance.pdist(X_mnist) min_dist = np.min(D) logging.info( "After PCA - minimum distance between samples is %f, dist to old %f", min_dist, np.max(np.abs(X_mnist_old - X_mnist))) X_mnist_old = X_mnist
import generate_data import settings import pandas as pd import numpy as np import matplotlib.pyplot as plt import settings parameters = settings.parameters X_mnist_raw = generate_data.load_x_mnist_raw(parameters=parameters) letters_A, letters_A_raw = generate_data.load_A_letters(parameters=parameters) print(letters_A_raw.shape, np.max(letters_A_raw[0, :]), np.min(letters_A_raw[0, :])) width = 10 #total number to show in one row start_index = 0 height = 10 # Number of rows /2 to show. half will go to labels, half to pictures. f, ax = plt.subplots(height, width) f.set_size_inches(16, 16) f.subplots_adjust() for i in range(int(height)): for j in range(width): ax[i][j].imshow(letters_A_raw[start_index + width * i + j, :].reshape( 28, 28), cmap='gray_r') #ax[2*i+1][j].text(text=str(letters_A_labels[start_index + width*i + j]) #str(chr(