def get_mnist(): mnist = MNIST('../datasets/MNIST') print("Loading Data ... ") x_train, y_train = mnist.load_training() #60000 samples x_test, y_test = mnist.load_testing() #10000 samples x_train = np.asarray(x_train).astype(np.float32) / 255 y_train = np.asarray(y_train).astype(np.int32) x_test = np.asarray(x_test).astype(np.float32) / 255 y_test = np.asarray(y_test).astype(np.int32) print("formatting data") training_labels = [] for y in y_train: label = np.zeros((10, 1)) label[y] = 1 training_labels.append(label) testing_labels = [] for y in y_test: label = np.zeros((10, 1)) label[y] = 1 testing_labels.append(label) training_data = [] for x, y in zip(x_train, training_labels): training_data.append((x.reshape(784, 1), y)) testing_data = [] for x, y in zip(x_test, testing_labels): testing_data.append((x.reshape(784, 1), y)) return (training_data, testing_data)
def MnistData(filepath = mnistpath): '''Loads Mnist and converts it into a pytorch tensor Input ----- filepath - 'string' Filepath for the location of the MNIST dataset Output ------ Xtrain - 'torch.tensor' Training set for the Mnist dataset trainLabels - 'torch.tensor' train labels Xtest - 'torch.tensor' Test set for the Mnist dataset testLabels - 'torch.tensor' test labels ''' # Load the MNIST Dataset mndata = MNIST(filepath) Xtrain, trainLabels = map(torch.tensor, mndata.load_training()) Xtest, testLabels = map(torch.tensor, mndata.load_testing()) Xtrain = Xtrain/255.0 # normalize dataset Xtest = Xtest/255.0 return Xtrain, trainLabels, Xtest, testLabels
def main(): # MNIST path data data_path = './Data/' mndata = MNIST(data_path) images_training, labels_training = mndata.load_training() images_testing, labels_testing = mndata.load_testing() images_training = np.asarray(images_training) images_testing = np.asarray(images_testing) # Normalize data images_training_normalize = preprocessing.normalize(images_training) labels_training = np.asarray(labels_training) # # This converts and plots the pca of a c x_hat, D = convert_pca(images_training_normalize[1]) data = np.array(images_training_normalize[1]).reshape(28, 28) plot_demo(data, x_hat) # We can plot our scree plot to determine what percentage of our variance is attributed to those features plot_scree(D) # Adding noise images_training_noise = create_noise(images_training) images_training_noise = np.array(images_training_noise) images_training_noise = preprocessing.normalize(images_training_noise) x_hat, D = convert_pca(images_training_noise[1]) data = np.array(images_training_noise[1]).reshape(28, 28) plot_demo(data, x_hat, fname='Noise_Comparison') plot_scree(D, n=15, name='Scree_Plot_Noise')
def get_mnist_data(): mnist_data = MNIST('python-mnist/data') train_X, train_y = mnist_data.load_training() test_X, test_y = mnist_data.load_testing() train_X = np.array(train_X).reshape(-1, 28, 28) test_X = np.array(test_X).reshape(-1, 28, 28) train_y, test_y = np.array(train_y), np.array(test_y) return train_X, train_y, test_X, test_y
def __init__(self, cv_iters): """ create df for features and labels remove samples that are not shared between the two tables """ assert cv_iters > 2, 'Cross validation folds must be more than 2 folds' self.cv_iters = cv_iters mndata = MNIST('data') self.features, self.labels = mndata.load_training() images, labels = mndata.load_testing() self.features = self.features + images self.features = np.array(self.features) self.labels = self.labels + labels self.labels = np.reshape(np.array(self.labels), (-1, 1)) self.labels = self.labels == 8 #6825 samles of 8 in total of 70000 samples self.shuffle()
def load_dataset(ds_path, training=True, mnist_format=True): if mnist_format: dataset = MNIST(ds_path) if training: return (dataset.load_training()[0], dataset.train_labels) else: return (dataset.load_testing()[0], dataset.test_labels) else: if training: with open(ds_path + "/training.json") as training_json: dataset = json.load(training_json) else: with open(ds_path + "/training.json") as testing_json: dataset = json.load(testing_json) imgs = [ast.literal_eval(img) for img in list(dataset.keys())] labels = list(dataset.values()) return (imgs, labels)
def MnistData(filepath=mnistpath): '''Loads Mnist and one hot encodes Y variable for both train and test variables Input ----- filepath - 'string' Filepath for the location of the MNIST dataset Output ------ Xtrain - 'np.array' Training set for the Mnist dataset Ytrain - 'np.array' One-hot encoded Y trained labels Xtest - 'np.array' Training set for the Mnist dataset Ytest - 'np.array' One-hot encoded Y trained labels ''' # Load the MNIST Dataset mndata = MNIST(filepath) Xtrain, labels_train = map(np.array, mndata.load_training()) Xtest, labels_test = map(np.array, mndata.load_testing()) Xtrain = Xtrain / 255.0 # normalize dataset Xtest = Xtest / 255.0 n, d = Xtrain.shape k = labels_train.max() + 1 # number of classes m = len(labels_test) # number of test observations Ytrain = np.zeros((n, k)) Ytrain[np.arange(n), labels_train] = 1 Ytest = np.zeros((m, k)) Ytest[np.arange(m), labels_test] = 1 return Xtrain, Ytrain, labels_train, Xtest, Ytest, labels_test
def EncodeMnist(values=(2, 7), encodingVal=(-1, 1), filepath=mnistpath): '''Function loads the mnist data, filters out anything outside of values indicated and encodes the labels with the encoded values indicated Input ----- filepath - 'string' Currently set to a global variable where the location of the mnist path values - 'list or tuple' Two values(0-9) to use for selecting binary values Output ------ X_trainC - 'np.array' training data set for the selected values X_testC - 'np.array' test data set for the selected values Y_train_lab - 'np.array' encoded values for the Y Values of the training set Y_test_lab - 'np.array' encoded values for the Y Values of the training set ''' # Load the MNIST Dataset mndata = MNIST(filepath) X_train, labels_train = map(np.array, mndata.load_training()) X_test, labels_test = map(np.array, mndata.load_testing()) X_train = X_train / 255.0 # normalize dataset X_test = X_test / 255.0 XtrainC, trainLab = binaryData(values, X_train, labels_train) XtestC, testLab = binaryData(values, X_test, labels_test) Ytrainlab = binarycode(trainLab, values, encodingVal) Ytestlab = binarycode(testLab, values, encodingVal) return XtrainC, Ytrainlab, XtestC, Ytestlab
# set1 = [ # SimpleExample([1, 1, 1], 1), # SimpleExample([0, 1, 1], 1), # SimpleExample([1, 1, 0], 0), # SimpleExample([0, 1, 0], 0), # SimpleExample([0, 0, 1], 0) # ] # pn = PerceptronNetwork(3, 1) # for i in range(100): # pn.fit(set1) # print(pn.testClassifier(set1)) mndata = MNIST(".") # mndata.load_training() mndata.load_testing() examples = [] # size = len(mndata.test_images) size = 50 for i in range(size): inpt = mndata.test_images[i] outpt = mndata.test_labels[i] example = SimpleExample(inpt, outpt) examples.append(example) pn = PerceptronNetwork(784, 10, 300) pn.fit(examples) print(pn.testClassifier(examples))
import numpy as np from deep_neural_network_batch_normalization import * from nn_utils import * from mnist.loader import MNIST # Loading data. db = '/Users/aclaudioquiros/Documents/NN Data/Data/MNIST_database/' mndata = MNIST(db) images, labels = mndata.load_training() images_test, labels_test = mndata.load_testing() images = np.array(images).T images_test = np.array(images_test).T labels = onehot(labels, images.shape) labels_test = onehot(labels_test, images_test.shape) images = normalize(images) images_test = normalize(images_test) # Playing with data to over fit model. # samples = 1e+3 # images = images[:, :int(samples)] # labels = labels[:, :int(samples)] # images = np.random.normal(0, 1, size=(400, 1000)) # layer_dim = [images.shape[0], 400, 400, 400, 400, 400, 400, 400, 400, 400, labels.shape[0]] # activations = [None, 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'relu', 'softmax'] layer_dim = [images.shape[0], 125, 40, labels.shape[0]] activations = [None, 'relu', 'relu', 'softmax'] deep_nn = NeuralNetwork(layer_dim, activations, learning_rate=0.2, num_iterations=1000,
from keras.layers import Dense, Activation from keras import optimizers, regularizers from keras.models import Sequential import time import numpy as np import cv2 from mnist.loader import MNIST m = MNIST('./data') import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] x_train, y_train = m.load_training() x_test, y_test = m.load_testing() x_train = np.asarray(x_train).astype(np.float32) y_train = np.asarray(y_train).astype(np.float32) x_test = np.asarray(x_test).astype(np.float32) y_test = np.asarray(y_test).astype(np.float32) n_classes = len(classes) #0-1 Hot encoding label_train = np.zeros((y_train.shape[0], n_classes)) a = np.arange(y_train.shape[0], dtype=np.int64) b = np.array(y_train, dtype=np.int64).reshape((y_train.shape[0], )) label_train[a, b] = 1 label_test = np.zeros((y_test.shape[0], n_classes)) c = np.arange(y_test.shape[0], dtype=np.int64) d = np.array(y_test, dtype=np.int64).reshape((y_test.shape[0], )) label_test[c, d] = 1
#%% from mnist.loader import MNIST import base64 import struct #%% mndata = MNIST("./data") images, labels = mndata.load_testing() num_images = len(images) print(num_images) #%% def map_pixel_float(p): return float(p) / 256.0 #%% def image_to_float(image): return map(map_pixel_float, image) #%%
Next, we explore the use of an affine transformation of the pixel values from d features to p features in an effor to increase our accuracy. We finally see performance of choosing a p and its test accuracy. @author: Christopher Salazar """ import numpy as np from mnist.loader import MNIST import matplotlib.pyplot as plt # Load the MNIST Dataset mndata = MNIST( r'C:\Users\salez\Documents\MISE Work\CSE 546\Homework\HW1\Programming\mnist' ) X_train, labels_train = map(np.array, mndata.load_training()) X_test, labels_test = map(np.array, mndata.load_testing()) X_train = X_train / 255.0 # normalize dataset X_test = X_test / 255.0 # Transform X_train and X_test into corresponding Y form d = len(X_train[0]) # Number of outputs k = 10 # Construct Y_train array per the form required for closed form solution n_train = len(X_train) Y_train = np.zeros((n_train, k)) for i in range(n_train): Y_train[i][labels_train[i]] = 1
def main(): # MNIST path data data_path = './Data/' mndata = MNIST(data_path) images_training, labels_training = mndata.load_training() images_testing, labels_testing = mndata.load_testing() images_training = np.asarray(images_training) images_testing = np.asarray(images_testing) images_training_noise = create_noise(images_training) images_testing_noise = create_noise(images_testing) labels_training = np.asarray(labels_training) labels_testing = np.asarray(labels_testing) # Normalize data images_training = preprocessing.normalize(images_training) images_testing = preprocessing.normalize(images_testing) images_training_noise = preprocessing.normalize(images_training_noise) images_testing_noise = preprocessing.normalize(images_testing_noise) images_training_pca_noise, images_testing_pca_noise = create_training_and_testing_data( images_training_noise, labels_training, images_testing_noise, labels_testing, n=15) pca_accuracy_list = deque() accuracy_list = deque() print('Starting classification') # KNN print('Starting KNN') knn_score = deque() knn_pca_score = deque() knn = KNeighborsClassifier() knn = knn.fit(images_training_noise, labels_training) knn_score.append(knn.score(images_testing_noise, labels_testing)) knn_pca = KNeighborsClassifier() knn_pca = knn_pca.fit(images_training_pca_noise, labels_training) knn_pca_score.append( knn_pca.score(images_testing_pca_noise, labels_testing)) knn_score = np.array(knn_score) knn_pca_score = np.array(knn_pca_score) accuracies_df = pd.DataFrame({ 'Scores': knn_score, 'PCA_Scores': knn_pca_score }).to_csv('KNNAccuracies_noise.csv', index=False) # Random Forest Accuracies clf_score = deque() clf_pca_score = deque() for n in range(1, 10): print('Iteration {} of Random Forest Classifier'.format(n)) clf = RandomForestClassifier(n_estimators=100) clf = clf.fit(images_training_noise, labels_training) clf_score.append(clf.score(images_testing_noise, labels_testing)) clf_pca = RandomForestClassifier(n_estimators=100) clf_pca = clf_pca.fit(images_training_pca_noise, labels_training) clf_pca_score.append( clf_pca.score(images_testing_pca_noise, labels_testing)) clf_score = np.array(clf_score) clf_pca_score = np.array(clf_pca_score) accuracies_df = pd.DataFrame({ 'Scores': clf_score, 'PCA_Scores': clf_pca_score }).to_csv('RandomForestAccuracies_noise.csv', index=False) # Decision Tree Accuracies dtc_score = deque() dtc_pca_score = deque() for n in range(1, 10): print('Iteration {} of Decision Tree Accuracies'.format(n)) dtc = tree.DecisionTreeClassifier() dtc = dtc.fit(images_training_noise, labels_training) dtc_score.append(dtc.score(images_testing_noise, labels_testing)) dtc_pca = tree.DecisionTreeClassifier() dtc_pca = dtc_pca.fit(images_training_pca_noise, labels_training) dtc_pca_score.append( dtc_pca.score(images_testing_pca_noise, labels_testing)) dtc_score = np.array(dtc_score) dtc_pca_score = np.array(dtc_pca_score) accuracies_df = pd.DataFrame({ 'Scores': dtc_score, 'PCA_Scores': dtc_pca_score }).to_csv('DecisionTreeAccuracies_noise.csv', index=False)
loss = "mse" opt = "gd" io = "../networks/mnist.obj" graph = False layers = [16, 16, 10] print(f"learnRate: {learnRate}") print(f"batch: {batch}") print(f"runs: {runs}") print(f"loss: {loss}") print(f"optimizer: {opt}") print(f"io: {io}") print(f"graph: {graph}") mndata = MNIST('./samples/numbers') trainData, trainLabels = mndata.load_training() #trainData = [[(random.random()*2)-1, (random.random()*2)-1] for i in range(1000)] #trainLabels = [int(i[0]-0.5>i[1] or i[0]+0.5<i[1]) for i in trainData] input_size = len(trainData[0]) print(f"architecture: {input_size}, {layers}\n") convnn = getNetwork(io, learnRate, batch, loss, opt, layers, input_size) convnn = train(runs, convnn, trainData, trainLabels, batch, graph) convnn.writeNetwork(io) print("training done") print(" ") testData, testLabels = mndata.load_testing() #testData = [[random.random()*2-1, random.random()*2-1] for i in range(20)] #testLabels = [int(i[0]-0.5>i[1]or i[0]+0.5<i[1]) for i in testData] results = test(convnn, testData, testLabels) print(f"final percentage: {results}")
return loss_history class LinearSVM(LinearClassifier): def loss(self, X_batch, y_batch, reg): return compute_loss(self.W, X_batch, y_batch, reg) # In[5]: mndata = MNIST('E:/Grad School/Semester 2/ML/Homeworks/MNIST/') # In[6]: X_train, Y_train = mndata.load_training() #60000 samples X_test, Y_test = mndata.load_testing() #10000 samples # In[7]: X_train = np.asarray(X_train).astype(np.float32) Y_train = np.asarray(Y_train).astype(np.int32) X_test = np.asarray(X_test).astype(np.float32) Y_test = np.asarray(Y_test).astype(np.int32) # In[8]: W = np.random.randn(784, 10) * 0.0001 plotnum1 = [] # In[9]:
predictions[i] = perceptrons[i].predict(data.images[n]) prediction = np.argmax(predictions) if prediction == data.labels[n]: correct += 1 return correct / data.size def make_conf_matrix(data, perceptrons): predictions = np.zeros(CLASSES) matrix = np.zeros((CLASSES, CLASSES)) for n in range(0, data.size): for i in range(0, CLASSES): predictions[i] = perceptrons[i].predict(data.images[n]) prediction = np.argmax(predictions) matrix[prediction, data.labels[n]] += 1 return print(matrix) if __name__ == '__main__': mndata = MNIST('./images/') train_images, train_labels = mndata.load_training() test_images, test_labels = mndata.load_testing() train_data = data.Data() train_data.load(60000, train_images, train_labels) test_data = data.Data() test_data.load(10000, test_images, test_labels) perceps = [perceptron.Perceptron(784) for i in range(CLASSES)] # weights = np.random.rand(10, 785) - .5 train_on_set(train_data, test_data, perceps, ETA, 70) make_conf_matrix(test_data, perceps)
公式リファレンス MNISTデータを使うために "https://pypi.org/project/python-mnist/" を参照してダウンロード、インストールしておく 教師付き次元削減とMetric learning """ import numpy as np from mnist.loader import MNIST import matplotlib.pyplot as plt import seaborn as sns sns.set(style='white', context='poster') mndata = MNIST('/Users/hamamatsuikadaigakubyouribu/UMAP_python/python-mnist/data') mndata train, train_labels = mndata.load_training() test, test_labels = mndata.load_testing() data = np.array(np.vstack([train, test]), dtype=np.float64) / 255.0 target = np.hstack([train_labels, test_labels]) classes = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot'] import umap
xp, yp, xn, yn = createdata(1) crossvalid(xp, yp, m, n, 0, 0) end = time.time() print("Time elapsed while crossvalid: {:.3f} ".format(end - start), "s") # In[44]: d = 784 # read data and preprocess from mnist.loader import MNIST mndata = MNIST('') X_train, Y_train = mndata.load_training() # or X_test, Y_test = mndata.load_testing() xp = np.empty((0, 784), int) yp = np.empty((0, 1), int) xn = np.empty((0, 784), int) yn = np.empty((0, 1), int) X_opt = np.empty((0, 784), int) Y_opt = np.empty((0, 1), int) for i in reversed(range(len(Y_train))): if Y_train[i] == 1: xp = np.append(xp, [X_train[i]], axis=0) yp = np.append(yp, [Y_train[i]]) elif Y_train[i] == 0: xn = np.append(xn, [X_train[i]], axis=0) yn = np.append(yn, [Y_train[i]]) for i in reversed(range(len(Y_test))):