def main(): train_data_fname = 'MNIST_train.pkl' valid_data_fname = 'MNIST_valid.pkl' test_data_fname = 'MNIST_test.pkl' X_train, y_train = neuralnet.load_data(train_data_fname) X_valid, y_valid = neuralnet.load_data(valid_data_fname) X_test, y_test = neuralnet.load_data(test_data_fname) neuralnet.config['epochs'] = 100 nnet = neuralnet.Neuralnetwork(neuralnet.config) training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(nnet, X_train, y_train, X_valid, y_valid, nnet.config) print("Optimal Number of Epochs: ", numEpochs) #setting thte model to the best weights and biases nnet.layers = best_model accuracy = neuralnet.test(nnet, X_test, y_test, nnet.config) print("Accuracy on Test Set: ", accuracy) #plotting results plt.plot(range(len(training_errors)), training_errors, "ro", color = "blue", label='Training Set Accuracy') plt.plot(range(len(validation_errors)), validation_errors, "ro", color = "red", label='Validation Set Accuracy') plt.legend(loc='upper left') plt.xlabel("Epochs") plt.ylabel("Percentage Correct") plt.title("Training on MNIST Dataset") plt.savefig('partC.png')
def main(): train_data_fname = 'MNIST_train.pkl' valid_data_fname = 'MNIST_valid.pkl' test_data_fname = 'MNIST_test.pkl' X_train, y_train = neuralnet.load_data(train_data_fname) X_valid, y_valid = neuralnet.load_data(valid_data_fname) X_test, y_test = neuralnet.load_data(test_data_fname) #found this as the optimal number of epochs from Part C neuralnet.config['epochs'] = 26 testshapes = [[784, 25, 10], [784, 100, 10], [784, 47, 47, 10]] for shape in testshapes: neuralnet.config['layer_specs'] = shape network = neuralnet.Neuralnetwork(neuralnet.config) training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer( network, X_train, y_train, X_valid, y_valid, network.config) network.layers = best_model accuracy = neuralnet.test(network, X_test, y_test, network.config) print("Shape: ", shape) print("Accuracy", accuracy) plt.plot(range(len(training_errors)), training_errors, "ro", color="blue", label='Training Set Accuracy') plt.plot(range(len(validation_errors)), validation_errors, "ro", color="red", label='Validation Set Accuracy') plt.legend(loc='upper left') plt.xlabel("Epochs") plt.ylabel("Percentage Correct") plt.title("Training with " + str(shape) + " Shape") name = "partF_" + str(shape) + ".png" plt.savefig(name) plt.close()
def sanity_network(data, default_config): """ Check implementation of the neural network's forward pass and backward pass. """ # Set seed to reproduce results. np.random.seed(42) # Random input for our network. random_image = np.random.randn(1, 784) # Initialize the network using the default configuration nnet = neuralnet.Neuralnetwork(default_config) # Compute the forward pass. nnet(random_image, targets=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) # Compute the backward pass. nnet.backward() layer_no = 0 # print(data['nnet'].layers) for layer_idx, layer in enumerate(nnet.layers): if isinstance(layer, neuralnet.Layer): # print(layer) layer_no += 1 error_x = np.sum(np.abs(data['nnet'].layers[layer_idx].x - layer.x)) error_w = np.sum(np.abs(data['nnet'].layers[layer_idx].w - layer.w)) error_b = np.sum(np.abs(data['nnet'].layers[layer_idx].b - layer.b)) error_d_w = np.sum( np.abs(data['nnet'].layers[layer_idx].d_w - layer.d_w)) error_d_b = np.sum( np.abs(data['nnet'].layers[layer_idx].d_b - layer.d_b)) check_error(error_x, f"Layer{layer_no}: Input") check_error(error_w, f"Layer{layer_no}: Weights") check_error(error_b, f"Layer{layer_no}: Biases") check_error(error_d_w, f"Layer{layer_no}: Weight Gradient") check_error(error_d_b, f"Layer{layer_no}: Bias Gradient") print(20 * "-", "\n")
def main(): train_data_fname = 'MNIST_train.pkl' valid_data_fname = 'MNIST_valid.pkl' test_data_fname = 'MNIST_test.pkl' X_train, y_train = neuralnet.load_data(train_data_fname) X_valid, y_valid = neuralnet.load_data(valid_data_fname) X_test, y_test = neuralnet.load_data(test_data_fname) # found this as the optimal number of epochs from Part C # optimal value is about 26 epochs # To test regularization, going to check for a few more epochs neuralnet.config['epochs'] = 30 regularization_constant_testers = [0.0001, 0.001] for regFactor in regularization_constant_testers: neuralnet.config['L2_penalty'] = regFactor network = neuralnet.Neuralnetwork(neuralnet.config) training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(network, X_train, y_train, X_valid, y_valid, network.config) network.layers = best_model accuracy = neuralnet.test(network, X_test, y_test, network.config) print("Regularization Constant: ", regFactor) print("Accuracy on Test Set: ", accuracy) print() plt.plot(range(len(training_errors)), training_errors,"ro", color = "blue", label= 'Training Set Accuracy') plt.plot(range(len(validation_errors)), validation_errors,"ro", color = "red", label= 'Validation Set Accuracy') plt.legend(loc='upper left') plt.xlabel("Epochs") plt.ylabel("Percentage Correct") plt.title("Training with regularization factor: " + str(regFactor)) name = "partD_" + str(regFactor) + ".png" plt.savefig(name) plt.close()
def main(): train_data_fname = 'MNIST_train.pkl' valid_data_fname = 'MNIST_valid.pkl' test_data_fname = 'MNIST_test.pkl' X_train, y_train = neuralnet.load_data(train_data_fname) X_valid, y_valid = neuralnet.load_data(valid_data_fname) X_test, y_test = neuralnet.load_data(test_data_fname) activation_functions = ["tanh", "sigmoid", "ReLU"] #found this as the optimal number of epochs from Part C neuralnet.config['epochs'] = 26 for function in activation_functions: neuralnet.config['activation'] = function network = neuralnet.Neuralnetwork(neuralnet.config) training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(network, X_train, y_train, X_valid, y_valid, network.config) network.layers = best_model accuracy = neuralnet.test(network, X_test, y_test, network.config) print("Activation Function Used: ", function) print("Accuracy: ", accuracy) plt.plot(range(len(training_errors)), training_errors,"ro", color = "blue", label='Training Set Accuracy') plt.plot(range(len(validation_errors)), validation_errors,"ro", color = "red", label='Validation Set Accuracy') plt.legend(loc='upper left') plt.xlabel("Epochs") plt.ylabel("Percentage Correct") plt.title("Training with " + function + " Function") name = "partE_" + str(function) + ".png" plt.savefig(name) plt.close()
def main(): # make_pickle() benchmark_data = pickle.load(open('validate_data.pkl', 'rb'), encoding='latin1') config = {} config['layer_specs'] = [ 784, 100, 100, 10 ] # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer. config[ 'activation'] = 'sigmoid' # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers config[ 'batch_size'] = 1000 # Number of training samples per batch to be passed to network config['epochs'] = 50 # Number of epochs to train the model config['early_stop'] = True # Implement early stopping or not config[ 'early_stop_epoch'] = 5 # Number of epochs for which validation loss increases to be counted as overfitting config['L2_penalty'] = 0 # Regularization constant config['momentum'] = False # Denotes if momentum is to be applied or not config[ 'momentum_gamma'] = 0.9 # Denotes the constant 'gamma' in momentum expression np.random.seed(42) x = np.random.randn(1, 100) act_sigmoid = neuralnet.Activation('sigmoid') act_tanh = neuralnet.Activation('tanh') act_ReLU = neuralnet.Activation('ReLU') out_sigmoid = act_sigmoid.forward_pass(x) err_sigmoid = np.sum(np.abs(benchmark_data['out_sigmoid'] - out_sigmoid)) check_error(err_sigmoid, "Sigmoid Forward Pass") out_tanh = act_tanh.forward_pass(x) err_tanh = np.sum(np.abs(benchmark_data['out_tanh'] - out_tanh)) check_error(err_tanh, "Tanh Forward Pass") out_ReLU = act_ReLU.forward_pass(x) err_ReLU = np.sum(np.abs(benchmark_data['out_ReLU'] - out_ReLU)) check_error(err_ReLU, "ReLU Forward Pass") print("**************") grad_sigmoid = act_sigmoid.backward_pass(1.0) err_sigmoid_grad = np.sum( np.abs(benchmark_data['grad_sigmoid'] - grad_sigmoid)) check_error(err_sigmoid_grad, "Sigmoid Gradient") grad_tanh = act_tanh.backward_pass(1.0) err_tanh_grad = np.sum(np.abs(benchmark_data['grad_tanh'] - grad_tanh)) check_error(err_tanh_grad, "Tanh Gradient") grad_ReLU = act_ReLU.backward_pass(1.0) err_ReLU_grad = np.sum(np.abs(benchmark_data['grad_ReLU'] - grad_ReLU)) check_error(err_ReLU_grad, "ReLU Gradient") np.random.seed(42) x_image = np.random.randn(1, 784) nnet = neuralnet.Neuralnetwork(config) nnet.forward_pass(x_image, targets=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) nnet.backward_pass() layer_no = 0 for layer_idx, layer in enumerate(nnet.layers): if isinstance(layer, neuralnet.Layer): layer_no += 1 error_x = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].x - layer.x)) error_w = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].w - layer.w)) error_b = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].b - layer.b)) error_d_w = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].d_w - layer.d_w)) error_d_b = np.sum( np.abs(benchmark_data['nnet'].layers[layer_idx].d_b - layer.d_b)) check_error(error_x, "Layer{} Input".format(layer_no)) check_error(error_w, "Layer{} Weights".format(layer_no)) check_error(error_b, "Layer{} Biases".format(layer_no)) check_error(error_d_w, "Layer{} Weight Gradient".format(layer_no)) check_error(error_d_b, "Layer{} Bias Gradient".format(layer_no))
def main(): train_data_fname = 'MNIST_train.pkl' X_train, y_train = neuralnet.load_data(train_data_fname) epsilon = 0.1 epsilon_squared = np.power(0.01, 2) nnet = neuralnet.Neuralnetwork(neuralnet.config) gradients = [] # will store lists of gradients and approximate gradients all_correct = True nnet.forward_pass(X_train[0].reshape(1,784), y_train[0]) num_layers = len(nnet.layers) for i in range(400): # run the network on this training example nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] nnet.backward_pass() for layer in nnet.layers: if isinstance(layer, neuralnet.Layer): layer.w = layer.w + 0.001 * layer.d_w layer.b = layer.b + 0.001 * layer.d_b # Check the gradient j = 0 for layer in nnet.layers: # add and subtract epsilon to the weights and do forward_pass to find E(w + e) and E(w - e) if isinstance(layer, neuralnet.Layer): if j == 0: # input to hidden Layer original_w1 = layer.w[0][0] # save original weight layer.w[0][0] = layer.w[0][0] + epsilon # add epsilon and compute loss input_to_hidden_w1_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] layer.w[0][0] = layer.w[0][0] - epsilon # subtract epsilon and compute loss input_to_hidden_w1_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] approx_grad1 = (input_to_hidden_w1_plus_loss - input_to_hidden_w1_minus_loss) / (2 * epsilon) # approx_grad = E(w + e) - E(w - e) / 2e layer.w[0][0] = original_w1 # set weight back to original weight nnet.backward_pass() # back pass to find gradient for each_layer in nnet.layers: if nnet.layers.index(each_layer) == 0: grad = each_layer.d_w[0][0] # find the gradient if grad_diff(grad, approx_grad1) > epsilon_squared: # if gradients differ by episilon squared, the gradient is incorrect all_correct = False print('Input to hidden gradient is incorrect') gradients.append([grad, approx_grad1]) # append the back pass gradient and the approximate gradient as a list to gradients original_w2 = layer.w[0][1] layer.w[0][1] = layer.w[0][1] + epsilon input_to_hidden_w2_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] layer.w[0][1] = layer.w[0][1] - epsilon input_to_hidden_w2_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] approx_grad2 = (input_to_hidden_w2_plus_loss - input_to_hidden_w2_minus_loss) / (2 * epsilon) layer.w[0][1] = original_w2 nnet.backward_pass() for each_layer in nnet.layers: if nnet.layers.index(each_layer) == 0: grad = each_layer.d_w[0][1] if grad_diff(grad, approx_grad2) > epsilon_squared: all_correct = False print('Input to hidden gradient is incorrect') gradients.append([grad, approx_grad2]) original_w3 = layer.b[0][0] layer.b[0][0] = layer.b[0][0] + epsilon hidden_bias_w_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] layer.b[0][0] = layer.b[0][0] - epsilon hidden_bias_w_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] approx_grad3 = (hidden_bias_w_plus_loss - hidden_bias_w_minus_loss) / (2 * epsilon) layer.b[0][0] = original_w3 nnet.backward_pass() for each_layer in nnet.layers: if nnet.layers.index(each_layer) == 0: grad = each_layer.d_b[0][0] if grad_diff(grad, approx_grad3) > epsilon_squared: all_correct = False print('Hidden bias gradient is incorrect') gradients.append([grad, approx_grad3]) if j == num_layers - 1: # hidden layer to output layer original_w4 = layer.w[0][0] layer.w[0][0] = layer.w[0][0] + epsilon hidden_to_output_w1_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] layer.w[0][0] = layer.w[0][0] - epsilon hidden_to_output_w1_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] approx_grad4 = (hidden_to_output_w1_plus_loss - hidden_to_output_w1_minus_loss) / (2 * epsilon) layer.w[0][0] = original_w4 nnet.backward_pass() for each_layer in nnet.layers: if nnet.layers.index(each_layer) == num_layers - 1: grad = each_layer.d_w[0][0] if grad_diff(grad, approx_grad4) > epsilon_squared: all_correct = False print('Hidden to output gradient is incorrect') gradients.append([grad, approx_grad4]) original_w5 = layer.w[0][1] layer.w[0][1] = layer.w[0][1] + epsilon hidden_to_output_w2_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] layer.w[0][1] = layer.w[0][1] - epsilon hidden_to_output_w2_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] approx_grad5 = (hidden_to_output_w2_plus_loss - hidden_to_output_w2_minus_loss) / (2 * epsilon) nnet.backward_pass() for each_layer in nnet.layers: if nnet.layers.index(each_layer) == num_layers - 1: grad = each_layer.d_w[0][1] if grad_diff(grad, approx_grad5) > epsilon_squared: all_correct = False print('Hidden to output gradient is incorrect') gradients.append([grad, approx_grad5]) original_w6 = layer.b[0][6] layer.b[0][6] = layer.b[0][6] + epsilon output_bias_w_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] layer.b[0][6] = layer.b[0][6] - epsilon output_bias_w_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0] approx_grad6 = (output_bias_w_plus_loss - output_bias_w_minus_loss) / (2 * epsilon) layer.b[0][6] = original_w6 nnet.backward_pass() for each_layer in nnet.layers: if nnet.layers.index(each_layer) == num_layers - 1: grad = each_layer.d_b[0][6] if grad_diff(grad, approx_grad6) > epsilon_squared: print(grad_diff(grad, approx_grad6)) all_correct = False print('Output bias gradient is incorrect') gradients.append([grad, approx_grad6]) j = j + 1 if all_correct: print('All gradients are correct') print('***********************************************') print('Input to hidden weight 1:') print('Gradient approximation:', gradients[0][1]) print('Actual gradient:', gradients[0][0]) print('Input to hidden weight 2:') print('Gradient approximation:', gradients[1][1]) print('Actual gradient:', gradients[1][0]) print('Hidden bias weight:') print('Gradient approximation:', gradients[2][1]) print('Actual gradient:', gradients[2][0]) print('Hidden to output weight 1:') print('Gradient approximation:', gradients[3][1]) print('Actual gradient:', gradients[3][0]) print('Hidden to output weight 2:') print('Gradient approximation:', gradients[4][1]) print('Actual gradient:', gradients[4][0]) print('Output bias weight:') print('Gradient approximation:', gradients[5][1]) print('Actual gradient:', gradients[5][0])