def data_mnist(datadir='/tmp/', train_start=0, train_end=60000, test_start=0, test_end=10000): """ Load and preprocess MNIST dataset :param datadir: path to folder where data should be stored :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :return: tuple of four arrays containing training data, training labels, testing data and testing labels. """ assert isinstance(train_start, int) assert isinstance(train_end, int) assert isinstance(test_start, int) assert isinstance(test_end, int) import mnist X_train = mnist.train_images() / 255. Y_train = mnist.train_labels() X_test = mnist.test_images() / 255. Y_test = mnist.test_labels() X_train = np.expand_dims(X_train, -1) X_test = np.expand_dims(X_test, -1) X_train = X_train[train_start:train_end] Y_train = Y_train[train_start:train_end] X_test = X_test[test_start:test_end] Y_test = Y_test[test_start:test_end] Y_train = utils.to_categorical(Y_train, num_classes=10) Y_test = utils.to_categorical(Y_test, num_classes=10) return X_train, Y_train, X_test, Y_test
#written by jorge orlando miranda ñahui #import the neccesary modules import numpy as np import mnist from keras.models import Sequential from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten from keras.utils import to_categorical #load the data from the mnits data set #training images (60000 records) train_images = mnist.train_images() train_labels = mnist.train_labels() #testing images (10000 records) test_images = mnist.test_images() test_labels = mnist.test_labels() #normalize the values to the range of [-0.5 0.5] norm_train = train_images / 255 - 0.5 test_images = test_images / 255 - 0.5 norm_train = np.expand_dims(norm_train, axis=3) test_images = np.expand_dims(test_images, axis=3) CNN_model = Sequential() #number of filters (9) num_filters = 9 #size of each filter (3x3) filter_size = 3 #dimension of input 28x28x1 (grayscale image) shape_input = (28, 28, 1) CNN_model.add(Conv2D(num_filters, filter_size, input_shape=shape_input)) #size of the pooling layer (2x2) size_pool = 2 #max Polling layer CNN_model.add(MaxPooling2D(pool_size=size_pool))
import mnist import numpy as np from conv import Conv from maxpool import MaxPool from softmax import Softmax import cv2 ''' input_img = np.array([[0,0,0,0,0,0], [0,0,50,0,29,0], [0,0,80,31,2,0], [0,33,90,0,75,0], [0,0,9,0,95,0], [0,0,0,0,0,0] ]) ''' train_images = mnist.train_images()[:1000] train_labels = mnist.train_labels()[:1000] test_images = mnist.test_images()[:1000] # test image size: 28 x 28 test_labels = mnist.test_labels()[:1000] conv = Conv(8) # 28 x 28 -> 26 x 26 x 8 pool = MaxPool() # 26 x 26 x 8 -> 13 x 13 x 8 softmax = Softmax(13*13*8,10) # 10 nodes for 10 digits 0 -> 9 def forward(images,labels): # transform image from [0->255] to [-0.5->0.5] out = conv.forward((images / 255)-0.5) out = pool.forward(out) out = softmax.forward(out) loss = -np.log(out[labels]) if np.argmax(out) == labels:
import mnist # pip install mnist import numpy as np import matplotlib.pyplot as plt from NeuralNetwork import NeuralNetwork from activations import relu, d_relu, softmax, d_softmax from loss_functions import categorical_crossentropy, d_categorical_crossentropy from optimizers import SGD x_train, y_train = mnist.train_images(), mnist.train_labels() n_features = x_train.shape[1] * x_train.shape[2] x_train_flatten = x_train.reshape( (x_train.shape[0], n_features)).astype(np.float64) x_test, y_test = mnist.test_images(), mnist.test_labels() x_test_flatten = x_test.reshape( (x_test.shape[0], n_features)).astype(np.float64) nn_mnist = NeuralNetwork(layers=[n_features, 100, 10], hidden_activation=(relu, d_relu), output_activation=(softmax, d_softmax), loss=(categorical_crossentropy, d_categorical_crossentropy), optimizer=SGD()) mnist_loss_hist = nn_mnist.fit(x=x_train_flatten, y=y_train, batch_size=512, epochs=100,
import numpy as np import mnist as mn import matplotlib.pyplot as plt # SREDJIVANJE PROMENLJIVIH eig_vec = np.genfromtxt('eigvec.csv', delimiter=',') #------------- images = mn.train_images() images = images.reshape(60000, 784) #images1 = images1.reshape(60000,784) #images2 = mn.test_images() #images2 = images2.reshape(10000,784) #images = np.vstack((images1, images2)) images = np.dot(eig_vec.T, images.T) images = images[:, : 100] # menjas ovo na sta hoces, ali moras da promenis i label na isti br #images = images.reshape(5,10) images = images.T images = images / np.std(images) for i in range(images.shape[0]): j = 0 for j in range(images.shape[1]): if images[i][j] > 0: images[i][j] = 1 elif images[i][j] >= -0.6 and images[i][j] <= 0.6: images[i][j] = 0 elif images[i][j] < 0:
import mnist import numpy as np import tensorflow as tf import tensorflow.distributions as tfds import matplotlib.pyplot as plt from vptsne import (VAE, PTSNE, VPTSNE) from vptsne.helpers import * from common import * from sklearn.decomposition import PCA from sklearn.manifold.t_sne import trustworthiness from sklearn.neighbors import KNeighborsClassifier as KNC np.random.seed(0) color_palette = np.random.rand(100, 3) mnist_train_images, mnist_train_labels = mnist.train_images().reshape(60000, 784) / 255, mnist.train_labels() mnist_test_images, mnist_test_labels = mnist.test_images().reshape(10000, 784) / 255, mnist.test_labels() n_input_dimensions = mnist_train_images.shape[1] def run_training(n_latent_dimensions, perplexity, batch_size, percent_missing): data_points = mnist_train_images.shape[0] indices = np.random.choice(data_points, int(data_points * (1 - percent_missing)), replace=False) train_data = mnist_train_images[indices] train_labels = mnist_train_labels[indices] test_data = mnist_test_images test_labels = mnist_test_labels vae = VAE( [n_input_dimensions], get_gaussian_network_builder(vae_encoder_layers, n_latent_dimensions), gaussian_prior_supplier,
import numpy as np import mnist as mn # SREDJIVANJE PROMENLJIVIH eig_vec = np.genfromtxt('eig_vec.csv', delimiter=',') #------------- images1 = mn.train_images() images1 = images1.reshape(60000, 784) images2 = mn.test_images() images2 = images2.reshape(10000, 784) images = np.vstack((images1, images2)) images = np.dot(eig_vec.T, images.T) images = images[:, : 60000] # menjas ovo na sta hoces, ali moras da promenis i label na isti br #images = images.reshape(5,10) images = images.T images = images / np.std(images) for i in range(images.shape[0]): j = 0 for j in range(images.shape[1]): if images[i][j] > 0.5: images[i][j] = 1 elif images[i][j] >= -0.5 and images[i][j] <= 0.5: images[i][j] = 0 elif images[i][j] < -0.5: images[i][j] = -1
def training_data(): load_data = [train_images(), train_labels()] training_inputs = [np.reshape(x, (784, 1)) for x in load_data[0]] training_results = [vectorized_result(y) for y in load_data[1]] train = zip(training_inputs, training_results) return train
i+=1 err_best_g_prev = err_best_g t_new = time.time() if verbose: print('iter: {}, best solution: {} time elapsed in secs:{} Tot: {}'.format(i,err_best_g,float(t_new-t_old),float(t_new-t_init))) f.write(str(float(err_best_g)) + '\n') print('\nFINAL SOLUTION:') #print(' > {}'.format(self.pos_best_g)) print(' > {}\n'.format(err_best_g)) t_total_new = time.time() print('total time elapsed:{}secs'.format(t_total_new-t_total_old)) return pos_best_g,err_best_g,maxiter,backpropsteps a = mnist.train_images() X_train = mnist.train_images().reshape(a.shape[0], (a.shape[1]*a.shape[2])) Y_train = mnist.train_labels() a = mnist.test_images() X_test = mnist.test_images().reshape(a.shape[0], (a.shape[1]*a.shape[2])) Y_test = mnist.test_labels() #oneHot = [np.zeros(3) for i in Y] ##for i in range(len(Y)): ## oneHot[i][Y[i]] = 1 #Y = np.array(oneHot) #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) #n_hidden = int(sys.argv[1])
__author__ = 'Jie' """ a code to predict the handwriting of digits based on 3 layers nn. the dataset is from an exsiting dataset library mnist """ import mnist from keras.models import Sequential from keras.layers import Dense from keras.utils import to_categorical import numpy as np import pandas as pd # load data trains_X_orig = mnist.train_images() # shape(60000,28,28) trains_y = mnist.train_labels() test_X_orig = mnist.test_images() #shape(10000,28,28) test_y = mnist.test_labels() #flatten the image data into 2D. (60000,784) trains_X = trains_X_orig.reshape( (trains_X_orig.shape[0], -1)) # input shape (60000,28*28) test_X = test_X_orig.reshape((test_X_orig.shape[0], -1)) # normalize data by 255 into [-0.5,0.5] trains_X = trains_X / 255 - 0.5 test_X = test_X / 255 - 0.5 # create model, 2 hidden layers+ 1 output layer= 3 layers nn. model = Sequential([ Dense(64, activation='relu', input_shape=(784, )), Dense(64, activation='relu'),
from beacon.nn.models import Sequential from beacon.optim import SGD from beacon.data import data_generator from beacon.functional import functions as F model = Sequential( Linear(784, 64), BatchNorm(input_shape=(1, 64)), ReLU(), Linear(64,10), Softmax() ) optimizer = SGD(model.parameters(), lr=0.01) # Preparing training data x_train = mnist.train_images().reshape(60000, 784) / 255.0 y_train = mnist.train_labels().reshape(60000, 1) y_train = np.eye(10)[y_train].reshape(60000, 10) # Preparing validation data x_test = mnist.test_images().reshape(10000, 784) / 255.0 y_test = mnist.test_labels().reshape(10000, 1) # Training model.train() for epoch in range(1, 6): full_loss = 0 n_loss = 0 for x, y in data_generator(x_train, y_train, batch_size=128, shuffle=True): optimizer.zero_grad() output = model(x)
from __future__ import print_function from __future__ import division import numpy as np import matplotlib.pyplot as plt import mnist plt.ion() # ------------------------------------------------------------------------------ # Dati num_clusters = 20 epoche = 30 data = mnist.train_images() / 255.0 num_patterns, pattern_side, _ = data.shape pattern_len = pattern_side * pattern_side data = data.reshape(num_patterns, pattern_len) centroids = np.zeros([num_clusters, pattern_len]) # ------------------------------------------------------------------------------ # Plotting: inizializza il plot dei pesi plot_centroids = [] fig = plt.figure(figsize=(10, 8)) for i in range(num_clusters): ax = fig.add_subplot(4, num_clusters / 4, i + 1, aspect="equal")
model_type = 'cnn' assert model_type in ('dnn', 'cnn') flatten = True if model_type == 'dnn' else False ## Dataset import mnist from sklearn.model_selection import train_test_split def preprocess(x, y, flatten): x = x.astype("float32")/255. y = y.astype("int64") x = x.reshape(-1, 784) if flatten else x.reshape(-1, 28, 28, 1) return x, y images, labels = mnist.train_images(), mnist.train_labels() x_train, x_valid, y_train, y_valid = train_test_split(images, labels, test_size=0.2, random_state=seed) x_test, y_test = mnist.test_images(), mnist.test_labels() x_train, y_train = preprocess(x_train, y_train, flatten) x_valid, y_valid = preprocess(x_valid, y_valid, flatten) x_test, y_test = preprocess(x_test, y_test, flatten) ## Model model = SimpleDNN() if flatten else SimpleCNN() loss_fn = tf.keras.losses.SparseCategoricalCrossentropy() optim = tf.keras.optimizers.Adam() ## Training best_loss, best_epoch, best_model = 1e12, 0, None
def Training(weightList, dimLayers): images = mnist.train_images() labels = mnist.train_labels() images = images.reshape((images.shape[0], images.shape[1] * images.shape[2])) epochs = 4 iterations = 50 # number of batches batchSize = 1000 masks = None dropOut = True minP = .5 maxP = .5 # get random permutation of images and labels combo = [] for j in range(iterations*batchSize): combo.append((images[j],labels[j])) random.shuffle(combo) numLayers = len(weightList) numNeurons = sum([pair[0]*pair[1] for pair in dimLayers]) counts = [] example = 0 for k in range(epochs): for batch in range (iterations): changes = [] # changes made to weights after 'batchsize' trials error = 0 count = 0 if batch%10 == 0: print(batch) # create masks for dropout if dropOut: masks = DropOut(dimLayers,minP,maxP) for trial in range(batchSize): # shuffle which ones drop out, faster than generating a new set of masks if dropOut: for mask in masks: np.random.shuffle(mask[0]) tempChanges = [] # has the changes backwards actLayers = [] idx = batchSize * batch + trial (img,ans) = combo[idx] img = np.asarray([img]) / 255 # make pixel values between 0 and 1 if dropOut: img = img * masks[0] actLayers = ProduceActLayers(img,numLayers,weightList,masks) # Backpropagation guess = actLayers.pop() # ensure we don't divide by 0. Should never be triggered if np.linalg.norm(guess) == 0: print(guess) guess /= np.linalg.norm(guess) # normalize guess if np.linalg.norm(guess) == 0 or np.linalg.norm(guess) > 1000: print("norm too big") sys.exit() guess = np.ndarray.tolist(guess) # if guess.index(max(guess)) == ans: # count += 1 costs = Cost(guess, ans) error += Error(costs) dCdG = dCdGuess(guess, ans) # change in cost WRT final act layer # change the learning rate if example < 300 * batchSize: delta = 0.03 elif example < 450 * batchSize: delta = 0.015 elif example < 600 * batchSize: delta = 0.005 elif example < 800 * batchSize: delta = 0.001 else: delta = 0.0005 # dCdL is vertical, 10x1 dCdL = dCdG # dCdL is dCdG multiplied by some activation Layers in reverse dCdL = delta * np.multiply(np.asarray([costs]).T,dCdL) c = .06 mult=[c,78.4*c] for layer in range(numLayers): # dC/dW = dC/dL * dL/dW. dL/dW is the previous actLayer's neurons if len(actLayers) != 0: dCdW = np.multiply(dCdL, actLayers.pop()).transpose()#*mult[layer] else: dCdW = np.multiply(dCdL, img).transpose()#*mult[layer] # for row in dCdW: # make each change thingy 1/(dC/dW) if it's not 0 # for i in range(len(row)): # if row[i] != 0: row[i] = delta * row[i] tempChanges.append(-dCdW) # dC/dL^(i-1) = dL^i/dL^(i-1) * dC/dL^i. the second term is W^(i-1) if (layer != numLayers - 1): dCdL = (weightList[numLayers-1-layer] @ dCdL) if dropOut: dCdL *= masks[numLayers-1-layer].T # average changes over a batch if len(changes) == 0: # the first trial of the batch for i in range(numLayers): changes.append(tempChanges[numLayers - 1 - i]) else: for i in range(numLayers): changes[i] += tempChanges[numLayers - 1 - i] example += 1 for layer in range(numLayers): # make changes weightList[layer] += changes[layer] counts.append(error) plt.plot(counts) plt.show() return weightList
def test_mnist_one_hot(num_train_examples=-1, num_test_examples=-1, hidden_layers=(100,), sigmoid='tanh', learning_rate=0.01, layer_decay=1.0, momentum=0.0, batch_size=100, num_epochs=100, csv_filename=None, return_test_accuracies=True): # Collect and preprocess the data. if sigmoid == 'logistic': train_input = convert_mnist_images_logistic(mnist.train_images()[:num_train_examples]) train_output = convert_mnist_labels_one_hot( mnist.train_labels()[:num_train_examples], positive=0.9, negative=0.1) test_input = convert_mnist_images_logistic(mnist.test_images()[:num_test_examples]) test_output = convert_mnist_labels_one_hot(mnist.test_labels()[:num_test_examples], positive=0.9, negative=0.1) elif sigmoid == 'tanh': train_input, mean_shift, std_scale = convert_mnist_images_train_tanh(mnist.train_images()[:num_train_examples]) train_output = convert_mnist_labels_one_hot( mnist.train_labels()[:num_train_examples], positive=1.0, negative=-1.0) test_input = convert_mnist_images_test_tanh(mnist.test_images()[:num_test_examples], mean_shift, std_scale) test_output = convert_mnist_labels_one_hot(mnist.test_labels()[:num_test_examples], positive=1.0, negative=-1.0) else: raise ValueError('Invalid sigmoid function.') # Create and train the neural network. layer_sizes = (784,) + hidden_layers + (10,) weight_decay = 0.0 nn = NeuralNetwork(layer_sizes, sigmoid=sigmoid, weight_decay=weight_decay) num_examples = train_input.shape[0] num_iterations = (num_examples // batch_size) * num_epochs rows = None if csv_filename is not None: rows = [] test_accuracies = None if return_test_accuracies: test_accuracies = [] def callback(iteration): if iteration % (num_examples // batch_size) == 0: epoch = iteration // (num_examples // batch_size) training_prediction_accuracy = get_prediction_accuracy(nn, train_input, train_output) test_prediction_accuracy = get_prediction_accuracy(nn, test_input, test_output) training_loss = nn.get_loss(train_input, train_output) test_loss = nn.get_loss(test_input, test_output) print('{},{:.6f},{:.6f},{:.6f},{:.6f}'.format(epoch, training_prediction_accuracy, test_prediction_accuracy, training_loss, test_loss)) if csv_filename is not None: rows.append((epoch, training_prediction_accuracy, test_prediction_accuracy, training_loss, test_loss)) if return_test_accuracies: test_accuracies.append(test_prediction_accuracy) print('Network Parameters') print('layer_sizes: {}, sigmoid: {}, weight_decay: {}'.format(layer_sizes, sigmoid, weight_decay)) print('Training Parameters') print('num_iterations: {}, learning_rate: {}, layer_decay: {}, momentum: {}, batch_size: {}'.format( num_iterations, learning_rate, layer_decay, momentum, batch_size)) print('') header = 'epoch,training_accuracy,test_accuracy,training_loss,test_loss' print(header) stochastic_gradient_descent(nn, train_input, train_output, num_iterations=num_iterations, learning_rate=learning_rate, layer_decay=layer_decay, momentum=momentum, batch_size=batch_size, callback=callback) if csv_filename is not None: save_rows_to_csv(csv_filename, rows, header.split(',')) if return_test_accuracies: return test_accuracies
import numpy as np import mnist from model.network import Net print('Loadind data......') num_classes = 10 train_images = mnist.train_images() #[60000, 28, 28] train_labels = mnist.train_labels() test_images = mnist.test_images() test_labels = mnist.test_labels() print('Preparing data......') train_images = (train_images - np.mean(train_images))/np.std(train_images) test_images = (test_images - np.mean(test_images))/np.std(test_images) #train_images = train_images/255 #test_images = test_images/255 training_data = train_images.reshape(60000, 1, 28, 28) training_labels = np.eye(num_classes)[train_labels] testing_data = test_images.reshape(10000, 1, 28, 28) testing_labels = np.eye(num_classes)[test_labels] net = Net() #print('Training Lenet......') #net.train(training_data, training_labels, 100, 1, 'weights_fp.pkl') #print('Testing Lenet......') #net.test(testing_data, testing_labels, 100) print('Testing with pretrained weights......') net.test_with_pretrained_weights(testing_data, testing_labels, 1, 'pretrained_weights.pkl') print('Predicting with pretrained weights......') print(net.predict_with_pretrained_weights(testing_data[0], 'pretrained_weights.pkl'))
import numpy as np import matplotlib as matplotlib import mnist X_train = mnist.train_images() Y_train = mnist.train_labels() X_test = mnist.test_images() Y_test = mnist.test_labels() X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1]**2)) / 255 X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1]**2)) / 255 print(X_train.shape) y = np.eye(10)[Y_train.astype('int32')] Y_train = y y = np.eye(10)[Y_test.astype('int32')] Y_test = y def relu(z): z[z < 0] = 0 return z def relu1(z): z[z <= 0] = 0 z[z > 0] = 1 return z
idcs = np.argsort(abs(d)) # consequently sort the eigenvectors E = E[:, idcs[::-1]] # chose the first 'num_PCs' components E = E[:, :num_PCs] # project the original data on the chosen components pca_data = np.dot(X, E) return pca_data, E, d[idcs[::-1]][:num_PCs] if __name__ == "__main__": import matplotlib.pyplot as plt import mnist data = mnist.train_images()[:200] / 255.0 labels = mnist.train_labels()[:200] num_patterns, pattern_side, _ = data.shape pattern_len = pattern_side * pattern_side data = data.reshape(num_patterns, pattern_len) data_out, E, d = PCA(data, 2) plt.imshow(E[:, 1].reshape(pattern_side, pattern_side)) plt.show() plt.hist(labels) plt.show()
from mnist import train_images, test_images, train_labels, test_labels if __name__ == '__main__': X_train, Y_train = train_images(), train_labels() X_test, X_test = test_images(), test_labels()
import numpy as np import mnist import matplotlib.pyplot as plt from sklearn import linear_model from sklearn.metrics import accuracy_score N = 1000 train_imgs = mnist.train_images()[:N] train_labels = mnist.train_labels()[:N] Xtrain_all = np.asarray(train_imgs) ytrain_all = np.array(train_labels.tolist()) dimension = Xtrain_all.shape[1] * Xtrain_all.shape[2] print(dimension) test_imgs = mnist.test_images()[:N] test_labels = mnist.test_labels()[:N] Xtest_all = np.asarray(test_imgs) ytest_all = np.array(test_labels.tolist()) cls = [[0], [1]] img_size = Xtrain_all.shape[1] * Xtrain_all.shape[2] def extract_data(X, y, classes): """ X: numpy array, matrix of size (N, d), d is data dim y: numpy array, size (N, ) cls: two lists of labels. For example: cls = [[1, 4, 7], [5, 6, 8]] return: X: extracted data
import numpy as np import mnist import tensorflow as tf from tensorflow import keras # Load Mnist Data print('Loading Data...') X_train, y_train, X_test, y_test = (mnist.train_images(), mnist.train_labels(), mnist.test_images(), mnist.test_labels()) # Normalize Data X_train = (X_train / 255) X_test = (X_test / 255) # Augment images to account for cell boundaries def augment_img(image): img = image.copy() top, right, bot, left = np.random.random(4) > 0.6 if top: size = np.random.randint(0, 3) img[:size] = min(np.abs(np.random.normal(loc=0.5, scale=0.5)), 1) if right: size = np.random.randint(0, 3) img[:, 28 - size:] = min(np.abs(np.random.normal(loc=0.5, scale=0.5)), 1) if bot: size = np.random.randint(0, 3) img[28 - size:] = min(np.abs(np.random.normal(loc=0.5, scale=0.5)), 1) if left:
def get_train_data(): train_images = mnist.train_images() return train_images.reshape(reshape_image_to_1D(train_images))
# Description: This program classifies the MNIST handwritten digit images # as a number 0 - 9 # Install packages pip install tensorflow keras numpy mnist matplotlib #import the packages / dependecies import numpy as np import mnist # Get data set from from keras.models import Sequential #ANN architecture from keras.layers import Dense # The layers in the ANN from keras.utils import to_categorical import matplotlib.pyplot as plt # Graph #Load the data set train_images = mnist.train_images() # training data of images train_labels = mnist.train_labels() # training data of the labels test_images = mnist. test_images() # testing data images test_labels = mnist.test_labels() # testing data labels #Normalize the images #Normalize the pixel values from [0, 255] to [-0.5 to 0.5] #This make the network easier to train train_images = (train_images / 255) - 0.5 test_images = (test_images/ 255) - 0.5 #Flatten the images. Flatten each 28 x 28 image into a 784= 28^2 #dimensional vector and pass into the neural network train_images = train_images.reshape((-1, 784)) test_images = test_images.reshape((-1,784)) #print the new image shape
wigstring += '\n\n\n' wigstring += 'who\n' wigstring += str(who) wigstring += '\n\n\n' wigstring += 'bho\n' wigstring += str(bho) wigstring += '\n\n\n' file.write(wigstring) file.close() train_data = mnist.train_images() train_label = mnist.train_labels() testing_data = mnist.test_images() testing_label = mnist.test_labels() train_data=1./train_data.max()*(train_data-train_data.mean()) testing_data=1./testing_data.max()*(testing_data-testing_data.mean()) start =time.time() # get current time train_accu=[] epoch_time=1 batch_size=250 for i in range(epoch_time): arr=np.arange(50000)
# this program classifies the MNIST handwritten digit images import numpy as np import mnist # The Data set import matplotlib.pyplot as plt from keras.models import Sequential # Ann architecture from keras.layers import Dense # Will provide the layers for ann from keras.utils import to_categorical # Loading the data set train_images = mnist.train_images() # Training data images train_labels = mnist.train_labels() # Training data labels test_images = mnist.test_images() # Training data images test_labels = mnist.test_labels() # Training data labels # Normalizing the images. Normalizing from [0, 255] to [-0.5, 05] # This is to make the ann easier to train train_images = (train_images / 255) - 0.5 test_images = (test_images / 255) - 0.5 # Flatten the images. I am going to flatten each 28 * 28 image into a vector # 28^2 = 784, This is the dimensional vector # To pass into the ann train_images = train_images.reshape((-1, 784)) test_images = test_images.reshape((-1, 784)) # Print the shape of the images print(train_images.shape) # (60000(rows), 784(columns) )
# Classify MNIST handwritten digit images as 0 - 9 # Import dependencies import numpy as np import matplotlib.pyplot as plt from keras.models import Sequential from keras.layers import Dense from keras.utils import to_categorical import mnist # Load the data train_images = mnist.train_images( ) # training data images - stored in numpy arrays print("The initial size of the training images: ", train_images.shape) # 60k train_labels = mnist.train_labels() # training data labels print("The number of training labels is: ", train_labels.shape) # 60k test_images = mnist.test_images() # testing data images print("The initial size of the testing images: ", test_images.shape) test_labels = mnist.test_labels() # testing data labels print("The initial size of the testing labels: ", test_labels.shape) # Normalize the images - normalize pixel values from [0, 255] to [-0.5, 0.5] to make our network easier to train train_images = (train_images / 255) - 0.5 test_images = (test_images / 255) - 0.5 # Flatten the images - flatten each 28 x 28 image into a 784 dimensional vector to pass to NN train_images = train_images.reshape((-1, 784)) test_images = test_images.reshape((-1, 784)) # Print the shape print(train_images.shape) # 60,000 rows and 784 columns
bv4 = beta1*bv4 + (1-beta1)*db4/batch_size bs4 = beta2*bs4 + (1-beta2)*(db4/batch_size)**2 b4 -= lr * bv4 / np.sqrt(bs4+1e-7) params = [f1, f2, w3, w4, b1, b2, b3, b4] return params # In[10]: num_images = 60000 batch_size = 32 X = np.asarray(mnist.train_images()[:num_images],dtype=np.float32) y = mnist.train_labels()[:num_images] num_classes = 10 imgdim = X.shape[1] # Random Shuffle the data permutation = np.random.permutation(num_images) X = X[permutation] y = y[permutation] # Normalizing the images X = X - int(np.mean(X)) X = X / int(np.std(X)) Y = np.array([np.eye(num_classes)[int(y[k])].reshape(num_classes, 1) for k in range(0,num_images)]) X = X.reshape(num_images, 1, imgdim, imgdim)
#Import Packages import numpy as np import mnist #Get Data Set From import matplotlib.pyplot as plt #Graph from keras.models import Sequential #ANN Architecture from keras.layers import Dense #The Layers In The ANN from keras.utils import to_categorical #Load Data To Set train_images = mnist.train_images() #Training Data - Images train_labels = mnist.train_labels() #Training Data - Labels test_images = mnist.test_images() #Test Data - Images test_labels = mnist.test_labels() #Test Data - Labels #Normalize The Images. Normalize The Pixel Values From [0, 255] #To [-0.5, 0.5] To Make Our Network Easier To Train train_images = (train_images / 255) - 0.5 test_images = (test_images / 255) - 0.5 #Flatten The Images - Each 28X28 Into A 784 Dimensional Vector To Pass Into NN train_images = train_images.reshape((-1, 784)) test_images = test_images.reshape((-1, 784)) #Print The Shape print(train_images.shape) #60,000 Rows, 784 Rows print(test_images.shape) #10,000 Rows, 784 Rows #Build The Model #With 3 Layers, #2 Layers With 64 Neurons And The Relu Function, #1 Layer With 10 Neurons And Softmax Function model = Sequential() model.add(Dense(64, activation='relu', input_dim=784))
import mnist import numpy as np import matplotlib.pyplot as plt images = np.append(mnist.train_images(), mnist.test_images(), axis=0) labels = np.append(mnist.train_labels(), mnist.test_labels(), axis=0) np.save('mnist_images.npy', images) np.save('mnist_labels.npy', labels)
class ProtoDash(): def create_spark_session(): global sc sc = SparkContext.getOrCreate() sc.setLogLevel("ERROR") return SparkSession(sc) def create_vec_rdd(X, part=4): """ Function returning a DenseVector RDD from a dataset X. Args: -X: a dataset with rows corresponding to observations and columns corresponding to features. -part: n of partitions. Returns: the RDD for X """ # creating a Spark session spark_session = ProtoDash.create_spark_session() X_rdd = (sc.parallelize( X, part).map(lambda x: DenseVector(x)).zipWithIndex()) return X_rdd def mean_inner_product(inp, sigma, n): """ Function computing the gaussian kernel inner product of a vector in Y vs. a vector in X, divided by n the total number of observations in X. """ index_1 = inp[0][1] inner_product = float( np.exp(inp[0][0].squared_distance(inp[1][0]) / (-2 * sigma**2)) / n) return (index_1, inner_product) def inner_product(inp, sigma): """ Function computing the gaussian kernel inner product of a vector vs. another. """ index_1 = inp[0][1] index_2 = inp[1][1] inner_product = float( np.exp(inp[0][0].squared_distance(inp[1][0]) / (-2 * sigma**2))) return (index_1, [(index_2, inner_product)]) def weighted_sum(inp, w_arr): """ compute the weighted sum of matrix values for a set of indices and weights. Note it is fine using a list comprehension here since the number of prototypes m << |X^{(1)}|. """ return float(np.sum(np.array([x[1] for x in inp]) * w_arr)) def udf_weighted_sum(w_arr): """ UDF instance of the weighted_sum function. """ return F.udf(lambda l: ProtoDash.weighted_sum(l, w_arr)) def merge_lists(x, y): """ merge lists. """ return sorted(x + y, key=lambda tup: tup[0]) # Create UDF corresponding to merge_lists function. DType = ArrayType( StructType( [StructField("_1", LongType()), StructField("_2", FloatType())])) udf_merge_lists = F.udf(merge_lists, DType) def optimize(K, u, opt_w0, init_val, max_w=10000): """ Function solving quadratic optimization problem. Args: -K: inner product matrix -u: mean inner product of each prototype -opt_w0: initial weights vector -init_val: starting run -max_w: an upper bound on weight value Returns: -weights and the objective value """ dim = u.shape[0] low_b = np.zeros((dim, 1)) upper_b = max_w * np.ones((dim, 1)) x_init = np.append(opt_w0, init_val / K[dim - 1, dim - 1]) G = np.vstack((np.identity(dim), -1 * np.identity(dim))) h = np.vstack((upper_b, -1 * low_b)) # solve constrained quadratic problem soltn = solve_qp(K, -u, G, h, A=None, b=None, solver='cvxopt', initvals=x_init) # calculate the objective function value for optimal solution x_sol = soltn.reshape(soltn.shape[0], 1) q = -u.reshape(u.shape[0], 1) obj_value = 1 / 2 * np.matmul(np.matmul(x_sol.T, K), x_sol) + np.matmul(q.T, x_sol) return (soltn, obj_value[0, 0]) def ProtoDashAlgoritm(X, Y, m, sigma, partitions=20, verbose=True): """ Implementation of the ProtoDash algorithm Args: -X (RDD of indexed DenseVector rows): Target dataset/ the dataset to be represented. -Y (RDD of indexed DenseVector rows): Source dataset/ the dataset to select prototypes from. -m (integer): total number of prototypes to select. -sigma (strictly positive float): gaussian kernel parameter. -partitions (integer): number of RDD partitions to compute inner product RDDs with. -verbose (boolean): whether or not to print the cumulative number of prototypes selected at each iteration. Returns: -L (integer list): the set of indices corresponding to selected prototypes. -w (float list): the optimal set of weights corresponding to each selected prototype. """ # get count of observations in X n_X = X.count() # build mu DataFrame mu_df = (Y.cartesian(X).map(lambda x: ProtoDash.mean_inner_product( x, sigma, n_X)).reduceByKey(lambda x, y: x + y).toDF(["obs", "mu"])) # initialise key variables L = np.zeros(m, dtype=int) # set of prototype indices L w = np.zeros(m, dtype=float) # set of optimal prototype weights f_eval = np.zeros( m, dtype=float) # set of the f(w) eval. at prototype selection n_L = 0 # count of prototypes selected so far # find the index corresponding to the maximum mu value max_grad_0 = mu_df.orderBy(F.desc("mu")).limit(1).collect()[0] # collect values L[n_L] = max_grad_0.obs w[n_L] = max_grad_0.mu f_eval[n_L] = 1 / 2 * max_grad_0.mu**2 n_L += 1 # select the row of Y corresponding to the first chosen index Y_row_j0 = Y.filter(lambda x: x[1] == L[:n_L]).collect()[0] # take its inner product with all rows of Y to build the starting K dataframe K_init_df = (Y.map(lambda x: ProtoDash.inner_product( (x, Y_row_j0), sigma)).toDF(["obs", "K"])) # join mu and K dataframes join_df = (mu_df.join(K_init_df, "obs").repartition(partitions)) # cache join_df as it is reused often join_df.cache() # compute the new gradient vector grad_df = (join_df.withColumn( "K_weighted", ProtoDash.udf_weighted_sum(w[:n_L])(F.col("K"))).withColumn( "grad", F.col("mu") - F.col("K_weighted")).select("obs", "grad")) # begin while loop while n_L < m: # remove the rows that have an index already included in L grad_df = grad_df.filter( ~grad_df.obs.isin([int(x) for x in L[:n_L]])) # find the row that has the maximum value in the filtered gradient vector argmax_grad = grad_df.orderBy(F.desc("grad")).limit(1).collect()[0] # update L L[n_L] = argmax_grad.obs # select the row of Y corresponding to the chosen index Y_row_j = Y.filter(lambda x: x[1] == L[n_L]).collect()[0] # take its inner product with all rows of Y to build new K K_int_df = (Y.map(lambda x: ProtoDash.inner_product( (x, Y_row_j), sigma)).toDF(["obs", "new_K_col"])) # add new K col to previous K col join_df = (join_df.join(K_int_df, "obs").withColumn( "K_merged", ProtoDash.udf_merge_lists(F.col("K"), F.col("new_K_col"))).select( "obs", "mu", "K_merged").withColumnRenamed( "K_merged", "K")) # cache new joined_df join_df.cache() # increment n_L n_L += 1 # sort L L[:n_L] = sorted(L[:n_L]) if verbose is True and n_L % 5 == 0: print("Prototypes selected - " + str(n_L)) # take max gradient val. max_grad = argmax_grad.grad # filter join dataframe for given indices in L filt_df = (join_df.filter( join_df.obs.isin([int(x) for x in L[:n_L] ])).orderBy(F.col("obs").asc())) # take mu vector mu_arr = np.array(filt_df.select("mu").collect(), dtype=float) # take K matrix K_mat = np.array( filt_df.rdd.map(lambda x: [y[1] for y in x[2]]).collect(), dtype=float) # find optimal weights for the index set L opt_res = ProtoDash.optimize(K_mat, mu_arr, w[:n_L - 1], max_grad) (w[:n_L], f_eval[n_L - 1]) = opt_res[0], -opt_res[1] # compute gradient vector with new optimal weights grad_df = (join_df.withColumn( "K_weighted", ProtoDash.udf_weighted_sum(w[:n_L])(F.col("K"))).withColumn( "grad", F.col("mu") - F.col("K_weighted")).select("obs", "grad")) # tuple of indices and their corresponding weight, sorted by weight in descending order. res = sorted([(w[i], L[i]) for i in range(m)], key=lambda tup: -tup[0]) # return tuple of index set L and optimal weight set w, set of f_eval return res, f_eval ####################################################### ######### RDF IMPLEMENTATION ########### ####################################################### rdf_dataset = None numeric_dataset = None subject_index = {} predicate_index = {} object_index = {} def infer_index(token, token_indices): """ Enumerate the distinct tokens. If the token is found in the token_indices, then return it, else assign the next integer number (after the last assigned index) which is also the size of the token_indices. """ if token in token_indices: return token_indices[token] else: token_index = len(token_indices) token_indices[token] = token_index return token_index def convert_rdf_to_ntriples(dataset): """ Loads rdf data and converts into n-triples, treating each triple as a datapoint in the dataset. """ g = Graph() g.load(dataset) rem_object = URIRef( "http://www.w3.org/2002/07/owl#NamedIndividual" ) # deleting the triples that have object value as 'owl#NamedIndividual' for s, p, o in g: g.remove((s, p, rem_object)) global rdf_dataset global numeric_dataset # create n-triples of strings rdf_dataset = [(str(s), str(p), str(o)) for s, p, o in g] # preprocess and create a numeric dataset in order to input to ProtoDash numeric_dataset = list( map( lambda e: (ProtoDash.infer_index(e[0], ProtoDash.subject_index), ProtoDash.infer_index(e[1], ProtoDash.predicate_index), ProtoDash.infer_index(e[2], ProtoDash.object_index)), rdf_dataset)) #print(rdf_dataset) #print('************************************') #print('Size of dataset:', len(rdf_dataset)) #print('Subjects cardinality:', len(subject_index)) #print('Predicates cardinality:', len(predicate_index)) #print('Objects cardinality:', len(object_index)) print('************************************') return numeric_dataset def strip_rdf_prefix(triple): """ Strips the common URL-like prefixes from the RDF data and takes the suffix after '#'. Example: Input triple: ('http://www.semanticweb.org/vinu/ontologies/2014/6/untitled-ontology-91#naomi_watts', 'http://www.semanticweb.org/vinu/ontologies/2014/6/untitled-ontology-91#acted_in', 'http://www.semanticweb.org/vinu/ontologies/2014/6/untitled-ontology-91#rabbits') Output: naomi_watts acted_in rabbits """ return ' '.join(tuple(map(lambda e: e[e.find('#') + 1:], triple))) def get_sample_index(dataset, sample): # Function returns the index of the triple in the dataset global dataset_rdd dataset_rdd = ProtoDash.convert_rdf_to_ntriples(dataset) index_list = [x for x, y in enumerate(rdf_dataset)] for i in range(len(rdf_dataset)): if rdf_dataset[i] == sample: return index_list[i] def get_rdf_prototypes(dataset, sample_triple, num_proto): # Index of the sample from the dataset to be given to the ProtoDash as dataset to be explained # These prototypes that come out of ProtoDash can be thought as the cluster that this sample belongs to. # Or vice versa, the sampled datapoint can be thought as cluster centroid, and the explaining prototypes # as the data that belong to that cluster. sample_index = ProtoDash.get_sample_index(dataset, sample_triple) if sample_index is not None: # Create a target dataset comprising of the selected sample target = [numeric_dataset[sample_index]] # Create a source dataset comprising of all triples but the selected sample source = numeric_dataset[:sample_index] + numeric_dataset[ sample_index + 1:] # Convert the datasets to PySpark RDDs target_rdd = ProtoDash.create_vec_rdd(target) source_rdd = ProtoDash.create_vec_rdd(source) print('Starting ProtoDash on RDF') res, f = ProtoDash.ProtoDashAlgoritm(target_rdd, source_rdd, num_proto, 50, partitions=4, verbose=True)[:2] print('Finished ProtoDash on RDF') print('The chosen sample_index:', sample_index) # Raw RDF triples has a long common prefixes, for the sake presentation (to keep it short), # I strip the common long URL-like prefixes and take the suffix after '#' - the data that matters. stripped_target = ProtoDash.strip_rdf_prefix( rdf_dataset[sample_index]) # Print the target datapoint print('Target (sampled) datapoint: ', stripped_target) # create the Y and X axis of the plot # The result (res) that comes from the ProtoDash is a list of pairs of weight and index # I use the index find the triples from the raw dataset to be used X-axis # and the weights are used as Y-coordinates values = list(map(lambda e: e[0], res)) # e[0] is weight names = list(map(lambda e: rdf_dataset[e[1]], res)) # e[1] is index # strip the names to fit into the plot names = list(map(ProtoDash.strip_rdf_prefix, names)) plt.barh(names, values) plt.title(stripped_target) plt.show() else: print("Please enter a valid triple") def ProtoDashOnRDF(dataset, num_proto, sample_triple): # dataset: path to the file # num_proto: number of prototypes for ProtoDash to select # sample_triple: the sample_triple is string which refer to the triple if os.path.isfile(dataset): if num_proto.isdigit(): sample_triple = tuple(sample_triple.split(',')) ProtoDash.get_rdf_prototypes(dataset, sample_triple, int(num_proto)) else: print("Number of prototypes can be only integer") else: print("File do not exists") ####################################################### ######### Image IMPLEMENTATION ########### ####################################################### # collect MNIST train/test sets train_images = np.array(mnist.train_images(), dtype='float') train_labels = mnist.train_labels() test_images = np.array(mnist.test_images(), dtype='float') test_labels = mnist.test_labels() def create_target_set(labels, images, digit, target_n, percentage): """ This function creates a MNIST image dataset in which a specified percentage of the total observations correspond to a specific digit, while the remaining observations correspond to other randomly chosen digits. Args: -labels: the digit label for each MNIST image. -images: the MNIST image. -digit: a digit between 0 and 9. -target_n: the number of total observations required in the target dataset. -percentage: the percentage of images in the target dataset that correspond to the specified digit. Returns: -the target images. """ # take integer number of obs. corresponding to digit n_dig = int(np.floor(percentage * target_n)) # get indices corresponding to digit idx = np.where(labels == digit)[0] # reduce indices to specific % idx_red = idx[:n_dig] # slice images with index and reshape target_set_dig = images[idx_red, :] target_set_dig = np.reshape(target_set_dig, (target_set_dig.shape[0], 28 * 28)) # get remaining indices rem = target_n - n_dig rem_ind = np.setdiff1d(np.arange(len(labels)), idx_red)[:rem] # fill the remaining observations with images corresponding to other digits target_set_non_dig = images[rem_ind] target_set_non_dig = np.reshape(target_set_non_dig, (target_set_non_dig.shape[0], 28 * 28)) # create the dataset target_set = np.vstack((target_set_non_dig, target_set_dig)) # shuffle it arr = np.arange(target_n) np.random.shuffle(arr) return target_set def get_image_prototypes(num_proto, digit): part = 6 # number of Pyspark RDD partitions to use sigma = 50 # gaussian kernel parameter n_1 = 5420 # the number of observations in X_1 n_2 = 1500 # the number of observations in X_2 #percentages = [.3, .5, .7, .9, 1.] percentages = [ 1. ] # the percentage of X_1 that will correspond to the chosen digit # list of experiment results exp_1_res_list = [] # list of f_eval sequences exp_1_f_eval_list = [] # set source dataset and labels source_set = np.reshape(ProtoDash.test_images[:n_2], (n_2, 28 * 28)) # select the target datasets target_set = ProtoDash.create_target_set(ProtoDash.train_labels, ProtoDash.train_images, digit, n_1, 1) # convert target and source datasets to RDDs target_rdd = ProtoDash.create_vec_rdd(target_set, part) source_rdd = ProtoDash.create_vec_rdd(source_set, part) # collect the indices of m prototypes along with their ascribed weight res, f = ProtoDash.ProtoDashAlgoritm(target_rdd, source_rdd, num_proto, sigma, partitions=part, verbose=True)[:2] # collect the results exp_1_res_list.append(res) exp_1_f_eval_list.append(f) fig, axes = plt.subplots(num_proto, 1, figsize=(12, 10), squeeze=False) for i in range(num_proto): for j in range(len(percentages)): axes[i][j].imshow( np.reshape(source_set[exp_1_res_list[j][i][1], :], (28, 28))) axes[i][j].get_xaxis().set_ticks([]) axes[i][j].get_yaxis().set_ticks([]) fig.suptitle("\n".join( wrap( "Top %d prototypes selected by ProtoDash corresponding to the digit %d" % (num_proto, digit), 60)), fontsize=20) plt.show() spark.stop() def ProtoDashOnImage(digit, num_proto): # digit: the digit to be represented in the target dataset X_1 # num_proto: number of prototypes for ProtoDash to select if digit.isdigit() and 0 <= int(digit) <= 9: if num_proto.isdigit(): ProtoDash.get_image_prototypes(int(num_proto), int(digit)) else: print("Please enter an integer value for number of prototypes") else: print("Please enter a digit between 0-9")
def main(): # XOR revisited # training data xs = [[0., 0], [0., 1], [1., 0], [1., 1]] ys = [[0.], [1.], [1.], [0.]] random.seed(0) net = Sequential([ Linear(input_dim=2, output_dim=2), Sigmoid(), Linear(input_dim=2, output_dim=1) ]) import tqdm optimizer = GradientDescent(learning_rate=0.1) loss = SSE() with tqdm.trange(3000) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) t.set_description(f"xor loss {epoch_loss:.3f}") for param in net.params(): print(param) # FizzBuzz Revisited from scratch.neural_networks import binary_encode, fizz_buzz_encode, argmax xs = [binary_encode(n) for n in range(101, 1024)] ys = [fizz_buzz_encode(n) for n in range(101, 1024)] NUM_HIDDEN = 25 random.seed(0) net = Sequential([ Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'), Tanh(), Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform'), Sigmoid() ]) def fizzbuzz_accuracy(low: int, hi: int, net: Layer) -> float: num_correct = 0 for n in range(low, hi): x = binary_encode(n) predicted = argmax(net.forward(x)) actual = argmax(fizz_buzz_encode(n)) if predicted == actual: num_correct += 1 return num_correct / (hi - low) optimizer = Momentum(learning_rate=0.1, momentum=0.9) loss = SSE() with tqdm.trange(1000) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) accuracy = fizzbuzz_accuracy(101, 1024, net) t.set_description(f"fb loss: {epoch_loss:.2f} acc: {accuracy:.2f}") # Now check results on the test set print("test results", fizzbuzz_accuracy(1, 101, net)) random.seed(0) net = Sequential([ Linear(input_dim=10, output_dim=NUM_HIDDEN, init='uniform'), Tanh(), Linear(input_dim=NUM_HIDDEN, output_dim=4, init='uniform') # No final sigmoid layer now ]) optimizer = Momentum(learning_rate=0.1, momentum=0.9) loss = SoftmaxCrossEntropy() with tqdm.trange(100) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = net.forward(x) epoch_loss += loss.loss(predicted, y) gradient = loss.gradient(predicted, y) net.backward(gradient) optimizer.step(net) accuracy = fizzbuzz_accuracy(101, 1024, net) t.set_description(f"fb loss: {epoch_loss:.3f} acc: {accuracy:.2f}") # Again check results on the test set print("test results", fizzbuzz_accuracy(1, 101, net)) # Load the MNIST data import mnist # This will download the data, change this to where you want it. # (Yes, it's a 0-argument function, that's what the library expects.) # (Yes, I'm assigning a lambda to a variable, like I said never to do.) mnist.temporary_dir = lambda: '/tmp' # Each of these functions first downloads the data and returns a numpy array. # We call .tolist() because our "tensors" are just lists. train_images = mnist.train_images().tolist() train_labels = mnist.train_labels().tolist() assert shape(train_images) == [60000, 28, 28] assert shape(train_labels) == [60000] import matplotlib.pyplot as plt fig, ax = plt.subplots(10, 10) for i in range(10): for j in range(10): # Plot each image in black and white and hide the axes. ax[i][j].imshow(train_images[10 * i + j], cmap='Greys') ax[i][j].xaxis.set_visible(False) ax[i][j].yaxis.set_visible(False) # plt.show() # Load the MNIST test data test_images = mnist.test_images().tolist() test_labels = mnist.test_labels().tolist() assert shape(test_images) == [10000, 28, 28] assert shape(test_labels) == [10000] # Recenter the images # Compute the average pixel value avg = tensor_sum(train_images) / 60000 / 28 / 28 # Recenter, rescale, and flatten train_images = [[(pixel - avg) / 256 for row in image for pixel in row] for image in train_images] test_images = [[(pixel - avg) / 256 for row in image for pixel in row] for image in test_images] assert shape(train_images) == [60000, 784], "images should be flattened" assert shape(test_images) == [10000, 784], "images should be flattened" # After centering, average pixel should be very close to 0 assert -0.0001 < tensor_sum(train_images) < 0.0001 # One-hot encode the test data train_labels = [one_hot_encode(label) for label in train_labels] test_labels = [one_hot_encode(label) for label in test_labels] assert shape(train_labels) == [60000, 10] assert shape(test_labels) == [10000, 10] # Training loop import tqdm def loop(model: Layer, images: List[Tensor], labels: List[Tensor], loss: Loss, optimizer: Optimizer = None) -> None: correct = 0 # Track number of correct predictions. total_loss = 0.0 # Track total loss. with tqdm.trange(len(images)) as t: for i in t: predicted = model.forward(images[i]) # Predict. if argmax(predicted) == argmax(labels[i]): # Check for correct += 1 # correctness. total_loss += loss.loss(predicted, labels[i]) # Compute loss. # If we're training, backpropagate gradient and update weights. if optimizer is not None: gradient = loss.gradient(predicted, labels[i]) model.backward(gradient) optimizer.step(model) # And update our metrics in the progress bar. avg_loss = total_loss / (i + 1) acc = correct / (i + 1) t.set_description(f"mnist loss: {avg_loss:.3f} acc: {acc:.3f}") # The logistic regression model for MNIST random.seed(0) # Logistic regression is just a linear layer followed by softmax model = Linear(784, 10) loss = SoftmaxCrossEntropy() # This optimizer seems to work optimizer = Momentum(learning_rate=0.01, momentum=0.99) # Train on the training data loop(model, train_images, train_labels, loss, optimizer) # Test on the test data (no optimizer means just evaluate) loop(model, test_images, test_labels, loss) # A deep neural network for MNIST random.seed(0) # Name them so we can turn train on and off dropout1 = Dropout(0.1) dropout2 = Dropout(0.1) model = Sequential([ Linear(784, 30), # Hidden layer 1: size 30 dropout1, Tanh(), Linear(30, 10), # Hidden layer 2: size 10 dropout2, Tanh(), Linear(10, 10) # Output layer: size 10 ]) # Training the deep model for MNIST optimizer = Momentum(learning_rate=0.01, momentum=0.99) loss = SoftmaxCrossEntropy() # Enable dropout and train (takes > 20 minutes on my laptop!) dropout1.train = dropout2.train = True loop(model, train_images, train_labels, loss, optimizer) # Disable dropout and evaluate dropout1.train = dropout2.train = False loop(model, test_images, test_labels, loss)
vectorized_y = np.zeros(shape) for idx, label in enumerate(ys): vectorized_y[idx][label] = 1 data = [] for x, y in zip(xs, vectorized_y): data.append((x.reshape(-1, 1).astype(float), y.reshape(-1, 1))) else: data = [] for x, y in zip(xs, ys): data.append((x.reshape(-1, 1).astype(float), y.reshape(-1, 1))) return data train = mnist.train_images()[:-10000], mnist.train_labels()[:-10000] validation = mnist.train_images()[-10000:], mnist.train_labels()[-10000:] test = mnist.test_images(), mnist.test_labels() train = format_data(*train) validation = format_data(*validation, vector_y=False) test = format_data(*test, vector_y=False) # Build the network net = Network([784, 50, 50, 10]) # Train the network net.SGD(train, 30, 10, 3, validation) # Test the network print("Test data: {:3.2f}% correct".format(100 * net.evaluate(test) /