def main(): # MNIST path data data_path = './Data/' mndata = MNIST(data_path) images_training, labels_training = mndata.load_training() images_testing, labels_testing = mndata.load_testing() images_training = np.asarray(images_training) images_testing = np.asarray(images_testing) # Normalize data images_training_normalize = preprocessing.normalize(images_training) labels_training = np.asarray(labels_training) # # This converts and plots the pca of a c x_hat, D = convert_pca(images_training_normalize[1]) data = np.array(images_training_normalize[1]).reshape(28, 28) plot_demo(data, x_hat) # We can plot our scree plot to determine what percentage of our variance is attributed to those features plot_scree(D) # Adding noise images_training_noise = create_noise(images_training) images_training_noise = np.array(images_training_noise) images_training_noise = preprocessing.normalize(images_training_noise) x_hat, D = convert_pca(images_training_noise[1]) data = np.array(images_training_noise[1]).reshape(28, 28) plot_demo(data, x_hat, fname='Noise_Comparison') plot_scree(D, n=15, name='Scree_Plot_Noise')
def MnistData(filepath = mnistpath): '''Loads Mnist and converts it into a pytorch tensor Input ----- filepath - 'string' Filepath for the location of the MNIST dataset Output ------ Xtrain - 'torch.tensor' Training set for the Mnist dataset trainLabels - 'torch.tensor' train labels Xtest - 'torch.tensor' Test set for the Mnist dataset testLabels - 'torch.tensor' test labels ''' # Load the MNIST Dataset mndata = MNIST(filepath) Xtrain, trainLabels = map(torch.tensor, mndata.load_training()) Xtest, testLabels = map(torch.tensor, mndata.load_testing()) Xtrain = Xtrain/255.0 # normalize dataset Xtest = Xtest/255.0 return Xtrain, trainLabels, Xtest, testLabels
def MnistData(filepath=mnistpath): '''Loads Mnist and converts it into a np.array Input ----- filepath - 'string' Filepath for the location of the MNIST dataset Output ------ Xtrain - 'np.array' Training set for the Mnist dataset Xtest - 'np.array' Test set for the Mnist dataset ''' # Load the MNIST Dataset mndata = MNIST(filepath) X, Labels = map(np.array, mndata.load_training()) Xtrain = X[:50000] Xtest = X[50000:] Xtrain = Xtrain / 255.0 # normalize dataset Xtest = Xtest / 255.0 return Xtrain, Labels, Xtest
def get_mnist(): mnist = MNIST('../datasets/MNIST') print("Loading Data ... ") x_train, y_train = mnist.load_training() #60000 samples x_test, y_test = mnist.load_testing() #10000 samples x_train = np.asarray(x_train).astype(np.float32) / 255 y_train = np.asarray(y_train).astype(np.int32) x_test = np.asarray(x_test).astype(np.float32) / 255 y_test = np.asarray(y_test).astype(np.int32) print("formatting data") training_labels = [] for y in y_train: label = np.zeros((10, 1)) label[y] = 1 training_labels.append(label) testing_labels = [] for y in y_test: label = np.zeros((10, 1)) label[y] = 1 testing_labels.append(label) training_data = [] for x, y in zip(x_train, training_labels): training_data.append((x.reshape(784, 1), y)) testing_data = [] for x, y in zip(x_test, testing_labels): testing_data.append((x.reshape(784, 1), y)) return (training_data, testing_data)
def get_mnist_data(): mnist_data = MNIST('python-mnist/data') train_X, train_y = mnist_data.load_training() test_X, test_y = mnist_data.load_testing() train_X = np.array(train_X).reshape(-1, 28, 28) test_X = np.array(test_X).reshape(-1, 28, 28) train_y, test_y = np.array(train_y), np.array(test_y) return train_X, train_y, test_X, test_y
def classifier(): mndata = MNIST("src/data/") images, labels = mndata.load_training() clf = KNeighborsClassifier() train_x = images[:100] train_y = labels[:100] clf.fit(train_x, train_y) return clf
def save(): mndata = MNIST(PATH_RAW) images, labels = mndata.load_training() data = list(zip(images, labels)) random.shuffle(data) images = [d[0] for d in data] labels = [d[1] for d in data] images = torch.tensor([[float(j) / 256.0 for j in i] for i in images], dtype=torch.float) labels = torch.tensor([[j] for j in labels], dtype=torch.long) torch.save(images, PATH_IMAGES) torch.save(labels, PATH_LABELS)
def __init__(self, cv_iters): """ create df for features and labels remove samples that are not shared between the two tables """ assert cv_iters > 2, 'Cross validation folds must be more than 2 folds' self.cv_iters = cv_iters mndata = MNIST('data') self.features, self.labels = mndata.load_training() images, labels = mndata.load_testing() self.features = self.features + images self.features = np.array(self.features) self.labels = self.labels + labels self.labels = np.reshape(np.array(self.labels), (-1, 1)) self.labels = self.labels == 8 #6825 samles of 8 in total of 70000 samples self.shuffle()
def load_dataset(ds_path, training=True, mnist_format=True): if mnist_format: dataset = MNIST(ds_path) if training: return (dataset.load_training()[0], dataset.train_labels) else: return (dataset.load_testing()[0], dataset.test_labels) else: if training: with open(ds_path + "/training.json") as training_json: dataset = json.load(training_json) else: with open(ds_path + "/training.json") as testing_json: dataset = json.load(testing_json) imgs = [ast.literal_eval(img) for img in list(dataset.keys())] labels = list(dataset.values()) return (imgs, labels)
def MnistData(filepath=mnistpath): '''Loads Mnist and one hot encodes Y variable for both train and test variables Input ----- filepath - 'string' Filepath for the location of the MNIST dataset Output ------ Xtrain - 'np.array' Training set for the Mnist dataset Ytrain - 'np.array' One-hot encoded Y trained labels Xtest - 'np.array' Training set for the Mnist dataset Ytest - 'np.array' One-hot encoded Y trained labels ''' # Load the MNIST Dataset mndata = MNIST(filepath) Xtrain, labels_train = map(np.array, mndata.load_training()) Xtest, labels_test = map(np.array, mndata.load_testing()) Xtrain = Xtrain / 255.0 # normalize dataset Xtest = Xtest / 255.0 n, d = Xtrain.shape k = labels_train.max() + 1 # number of classes m = len(labels_test) # number of test observations Ytrain = np.zeros((n, k)) Ytrain[np.arange(n), labels_train] = 1 Ytest = np.zeros((m, k)) Ytest[np.arange(m), labels_test] = 1 return Xtrain, Ytrain, labels_train, Xtest, Ytest, labels_test
def EncodeMnist(values=(2, 7), encodingVal=(-1, 1), filepath=mnistpath): '''Function loads the mnist data, filters out anything outside of values indicated and encodes the labels with the encoded values indicated Input ----- filepath - 'string' Currently set to a global variable where the location of the mnist path values - 'list or tuple' Two values(0-9) to use for selecting binary values Output ------ X_trainC - 'np.array' training data set for the selected values X_testC - 'np.array' test data set for the selected values Y_train_lab - 'np.array' encoded values for the Y Values of the training set Y_test_lab - 'np.array' encoded values for the Y Values of the training set ''' # Load the MNIST Dataset mndata = MNIST(filepath) X_train, labels_train = map(np.array, mndata.load_training()) X_test, labels_test = map(np.array, mndata.load_testing()) X_train = X_train / 255.0 # normalize dataset X_test = X_test / 255.0 XtrainC, trainLab = binaryData(values, X_train, labels_train) XtestC, testLab = binaryData(values, X_test, labels_test) Ytrainlab = binarycode(trainLab, values, encodingVal) Ytestlab = binarycode(testLab, values, encodingVal) return XtrainC, Ytrainlab, XtestC, Ytestlab
import keras #from __future__ import print_function from keras.datasets import fashion_mnist from mnist.loader import MNIST ''' (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() print('x_train shape:\t', x_train.shape) print('x_test shape:\t', x_test.shape) print('y_train shape:\t', y_train.shape) print('y_test shape:\t', y_test.shape) ''' mndata = MNIST('E:/AI/example/fashion') mndata.load_testing() mndata.load_training() x_test = mndata.test_images x_train = mndata.train_images y_test = np.asarray(mndata.test_labels) y_train = np.asarray(mndata.train_labels) x_test = np.asarray(x_test) x_train = np.asarray(x_train) y_test = np.asarray(y_test) y_train = np.asarray(y_train) x_test = x_test.reshape(10000, 28,28) x_train = x_train.reshape(60000, 28,28) print('x_train shape:\t', x_train.shape) print('x_test shape:\t', x_test.shape)
import numpy as np import cv2 from collections import deque from mnist.loader import MNIST from sklearn.model_selection import train_test_split from keras.utils import np_utils from keras.models import Sequential from keras.layers import Dense, Dropout, Flatten from keras.callbacks import ModelCheckpoint # Use python-mnist library to import pre-shuffled EMNIST letters data emnist_data = MNIST(path='data\\', return_type='numpy') emnist_data.select_emnist('letters') X, y = emnist_data.load_training() # Reshape the data X = X.reshape(124800, 28, 28) y = y.reshape(124800, 1) # Make it 0 based indices y = y - 1 # Split test and train X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=111) # Rescale the Images by Dividing Every Pixel in Every Image by 255 # Rescale [0,255] --> [0,1] X_train = X_train.astype('float32') / 255
predictions[i] = perceptrons[i].predict(data.images[n]) prediction = np.argmax(predictions) if prediction == data.labels[n]: correct += 1 return correct / data.size def make_conf_matrix(data, perceptrons): predictions = np.zeros(CLASSES) matrix = np.zeros((CLASSES, CLASSES)) for n in range(0, data.size): for i in range(0, CLASSES): predictions[i] = perceptrons[i].predict(data.images[n]) prediction = np.argmax(predictions) matrix[prediction, data.labels[n]] += 1 return print(matrix) if __name__ == '__main__': mndata = MNIST('./images/') train_images, train_labels = mndata.load_training() test_images, test_labels = mndata.load_testing() train_data = data.Data() train_data.load(60000, train_images, train_labels) test_data = data.Data() test_data.load(10000, test_images, test_labels) perceps = [perceptron.Perceptron(784) for i in range(CLASSES)] # weights = np.random.rand(10, 785) - .5 train_on_set(train_data, test_data, perceps, ETA, 70) make_conf_matrix(test_data, perceps)
def main(): # MNIST path data data_path = './Data/' mndata = MNIST(data_path) images_training, labels_training = mndata.load_training() images_testing, labels_testing = mndata.load_testing() images_training = np.asarray(images_training) images_testing = np.asarray(images_testing) images_training_noise = create_noise(images_training) images_testing_noise = create_noise(images_testing) labels_training = np.asarray(labels_training) labels_testing = np.asarray(labels_testing) # Normalize data images_training = preprocessing.normalize(images_training) images_testing = preprocessing.normalize(images_testing) images_training_noise = preprocessing.normalize(images_training_noise) images_testing_noise = preprocessing.normalize(images_testing_noise) images_training_pca_noise, images_testing_pca_noise = create_training_and_testing_data( images_training_noise, labels_training, images_testing_noise, labels_testing, n=15) pca_accuracy_list = deque() accuracy_list = deque() print('Starting classification') # KNN print('Starting KNN') knn_score = deque() knn_pca_score = deque() knn = KNeighborsClassifier() knn = knn.fit(images_training_noise, labels_training) knn_score.append(knn.score(images_testing_noise, labels_testing)) knn_pca = KNeighborsClassifier() knn_pca = knn_pca.fit(images_training_pca_noise, labels_training) knn_pca_score.append( knn_pca.score(images_testing_pca_noise, labels_testing)) knn_score = np.array(knn_score) knn_pca_score = np.array(knn_pca_score) accuracies_df = pd.DataFrame({ 'Scores': knn_score, 'PCA_Scores': knn_pca_score }).to_csv('KNNAccuracies_noise.csv', index=False) # Random Forest Accuracies clf_score = deque() clf_pca_score = deque() for n in range(1, 10): print('Iteration {} of Random Forest Classifier'.format(n)) clf = RandomForestClassifier(n_estimators=100) clf = clf.fit(images_training_noise, labels_training) clf_score.append(clf.score(images_testing_noise, labels_testing)) clf_pca = RandomForestClassifier(n_estimators=100) clf_pca = clf_pca.fit(images_training_pca_noise, labels_training) clf_pca_score.append( clf_pca.score(images_testing_pca_noise, labels_testing)) clf_score = np.array(clf_score) clf_pca_score = np.array(clf_pca_score) accuracies_df = pd.DataFrame({ 'Scores': clf_score, 'PCA_Scores': clf_pca_score }).to_csv('RandomForestAccuracies_noise.csv', index=False) # Decision Tree Accuracies dtc_score = deque() dtc_pca_score = deque() for n in range(1, 10): print('Iteration {} of Decision Tree Accuracies'.format(n)) dtc = tree.DecisionTreeClassifier() dtc = dtc.fit(images_training_noise, labels_training) dtc_score.append(dtc.score(images_testing_noise, labels_testing)) dtc_pca = tree.DecisionTreeClassifier() dtc_pca = dtc_pca.fit(images_training_pca_noise, labels_training) dtc_pca_score.append( dtc_pca.score(images_testing_pca_noise, labels_testing)) dtc_score = np.array(dtc_score) dtc_pca_score = np.array(dtc_pca_score) accuracies_df = pd.DataFrame({ 'Scores': dtc_score, 'PCA_Scores': dtc_pca_score }).to_csv('DecisionTreeAccuracies_noise.csv', index=False)
from mnist.loader import MNIST import random import numpy as np mndata = MNIST('data') features, labels = mndata.load_training() images, tlabels = mndata.load_testing() features = features + images features = np.array(features) labels = labels + tlabels labels = np.reshape(np.array(labels),(-1,1)) for i in range(50): from PIL import Image img = Image.fromarray(np.reshape(features[i].astype(np.uint8),(28,28)), 'L') img.resize((256,256)).save('HD{}.png'.format(labels[i])) ''' labels = labels == 8 ones = np.array([i for i in range(labels.shape[0]) if labels[i]==1]) np.random.shuffle(ones) ones = np.reshape(ones,(5, -1)) #keys to feature where label is 0 zeros = np.array([i for i in range(labels.shape[0]) if labels[i]==0]) np.random.shuffle(zeros) zeros = np.reshape(zeros,(5, -1)) ind = list(range(5)) ind = np.delete(ind, 2) print(zeros[ind].flatten())
from keras.layers import Dense, Activation from keras import optimizers, regularizers from keras.models import Sequential import time import numpy as np import cv2 from mnist.loader import MNIST m = MNIST('./data') import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] x_train, y_train = m.load_training() x_test, y_test = m.load_testing() x_train = np.asarray(x_train).astype(np.float32) y_train = np.asarray(y_train).astype(np.float32) x_test = np.asarray(x_test).astype(np.float32) y_test = np.asarray(y_test).astype(np.float32) n_classes = len(classes) #0-1 Hot encoding label_train = np.zeros((y_train.shape[0], n_classes)) a = np.arange(y_train.shape[0], dtype=np.int64) b = np.array(y_train, dtype=np.int64).reshape((y_train.shape[0], )) label_train[a, b] = 1 label_test = np.zeros((y_test.shape[0], n_classes)) c = np.arange(y_test.shape[0], dtype=np.int64) d = np.array(y_test, dtype=np.int64).reshape((y_test.shape[0], )) label_test[c, d] = 1
Next, we explore the use of an affine transformation of the pixel values from d features to p features in an effor to increase our accuracy. We finally see performance of choosing a p and its test accuracy. @author: Christopher Salazar """ import numpy as np from mnist.loader import MNIST import matplotlib.pyplot as plt # Load the MNIST Dataset mndata = MNIST( r'C:\Users\salez\Documents\MISE Work\CSE 546\Homework\HW1\Programming\mnist' ) X_train, labels_train = map(np.array, mndata.load_training()) X_test, labels_test = map(np.array, mndata.load_testing()) X_train = X_train / 255.0 # normalize dataset X_test = X_test / 255.0 # Transform X_train and X_test into corresponding Y form d = len(X_train[0]) # Number of outputs k = 10 # Construct Y_train array per the form required for closed form solution n_train = len(X_train) Y_train = np.zeros((n_train, k)) for i in range(n_train): Y_train[i][labels_train[i]] = 1
runs = 10 loss = "mse" opt = "gd" io = "../networks/mnist.obj" graph = False layers = [16, 16, 10] print(f"learnRate: {learnRate}") print(f"batch: {batch}") print(f"runs: {runs}") print(f"loss: {loss}") print(f"optimizer: {opt}") print(f"io: {io}") print(f"graph: {graph}") mndata = MNIST('./samples/numbers') trainData, trainLabels = mndata.load_training() #trainData = [[(random.random()*2)-1, (random.random()*2)-1] for i in range(1000)] #trainLabels = [int(i[0]-0.5>i[1] or i[0]+0.5<i[1]) for i in trainData] input_size = len(trainData[0]) print(f"architecture: {input_size}, {layers}\n") convnn = getNetwork(io, learnRate, batch, loss, opt, layers, input_size) convnn = train(runs, convnn, trainData, trainLabels, batch, graph) convnn.writeNetwork(io) print("training done") print(" ") testData, testLabels = mndata.load_testing() #testData = [[random.random()*2-1, random.random()*2-1] for i in range(20)] #testLabels = [int(i[0]-0.5>i[1]or i[0]+0.5<i[1]) for i in testData] results = test(convnn, testData, testLabels) print(f"final percentage: {results}")
""" Render the image in each of it's retinal 'zones' This will be the basis of what each retinal unit views as we progress. Not that the pixels that will have the strongest stimuli are white (as they are closer to 1), areas of least stimuli are black (value close to 0). """ import random import numpy as np import matplotlib.pyplot as plt from mnist.loader import MNIST mndata = MNIST('/home/niloofar/snn-Akhil/Spiking-Neural-Network/mnist') images, labels = mndata.load_training() def get_next_image(index=0, pick_random = False, display=True): if pick_random: index = random.randint(0, len(images)-1) image = images[index] label = labels[index] if display: print('Label: {}'.format(label)) print(mndata.display(image)) image = np.asarray(image).reshape((28,28)) image_norm = (image * 255.0/image.max()) / 255. return image_norm, label def graph_retinal_image(image, stride): fig = plt.figure()
from MNISTModule.classification_set import ClassificationSet from MNISTModule.classification import Classification from mnist.loader import MNIST import numpy as np import os samples_path = './SampleData/MNIST' write_path = './accuracy.txt' mndata = MNIST(samples_path) mnist_training_set = ClassificationSet() if os.path.exists(write_path): mode = 'a+' else: f = open(write_path, "w+") test_sigma = 8.5 images_training, labels_training = mndata.load_training() images_testing, labels_testing = mndata.load_testing() classification = Classification(images_training, labels_training, images_testing, labels_testing, sigma=test_sigma) classification.create_model() print(classification.calculate_accuracy(mode='verbose')) # with open(write_path, 'a+') as file: # file.write(str(test_sigma) + ',' + classification.calculate_accuracy(calculate=False, mode='return')) # file.write('\n')
import pandas as pd from sklearn.model_selection import StratifiedKFold from mnist.loader import MNIST import numpy as np if __name__ == '__main__': mnist = MNIST('../input') x_train, y_train = mnist.load_training() #x_test, y_test = mnist.load_testing() x_train = np.asarray(x_train).astype(np.float32) y_train = np.asarray(y_train).astype(np.int32) #x_test = np.asarray(x_test).astype(np.float32) #y_test = np.asarray(y_test).astype(np.int32) print(f'Shape of X Train is {x_train.shape}') df = pd.DataFrame(data=x_train) df["label"] = y_train df = df.sample(frac=1).reset_index(drop=True) df['kfold'] = -1 kf = StratifiedKFold(n_splits=5) for f, (t_, v_) in enumerate(kf.split(X=df, y=y_train)): df.loc[v_, 'kfold'] = f print(df.head()) df.to_csv("../input/mnist_train_folds.csv", index=False)
plt.plot() return loss_history class LinearSVM(LinearClassifier): def loss(self, X_batch, y_batch, reg): return compute_loss(self.W, X_batch, y_batch, reg) # In[5]: mndata = MNIST('E:/Grad School/Semester 2/ML/Homeworks/MNIST/') # In[6]: X_train, Y_train = mndata.load_training() #60000 samples X_test, Y_test = mndata.load_testing() #10000 samples # In[7]: X_train = np.asarray(X_train).astype(np.float32) Y_train = np.asarray(Y_train).astype(np.int32) X_test = np.asarray(X_test).astype(np.float32) Y_test = np.asarray(Y_test).astype(np.int32) # In[8]: W = np.random.randn(784, 10) * 0.0001 plotnum1 = [] # In[9]:
https://umap-learn.readthedocs.io/en/latest/supervised.html#using-labels-to-separate-classes-supervised-umap 公式リファレンス MNISTデータを使うために "https://pypi.org/project/python-mnist/" を参照してダウンロード、インストールしておく 教師付き次元削減とMetric learning """ import numpy as np from mnist.loader import MNIST import matplotlib.pyplot as plt import seaborn as sns sns.set(style='white', context='poster') mndata = MNIST('/Users/hamamatsuikadaigakubyouribu/UMAP_python/python-mnist/data') mndata train, train_labels = mndata.load_training() test, test_labels = mndata.load_testing() data = np.array(np.vstack([train, test]), dtype=np.float64) / 255.0 target = np.hstack([train_labels, test_labels]) classes = [ 'T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
n = 200 start = time.time() xp, yp, xn, yn = createdata(1) crossvalid(xp, yp, m, n, 0, 0) end = time.time() print("Time elapsed while crossvalid: {:.3f} ".format(end - start), "s") # In[44]: d = 784 # read data and preprocess from mnist.loader import MNIST mndata = MNIST('') X_train, Y_train = mndata.load_training() # or X_test, Y_test = mndata.load_testing() xp = np.empty((0, 784), int) yp = np.empty((0, 1), int) xn = np.empty((0, 784), int) yn = np.empty((0, 1), int) X_opt = np.empty((0, 784), int) Y_opt = np.empty((0, 1), int) for i in reversed(range(len(Y_train))): if Y_train[i] == 1: xp = np.append(xp, [X_train[i]], axis=0) yp = np.append(yp, [Y_train[i]]) elif Y_train[i] == 0: xn = np.append(xn, [X_train[i]], axis=0) yn = np.append(yn, [Y_train[i]])