def main():

	train_data_fname = 'MNIST_train.pkl'
	valid_data_fname = 'MNIST_valid.pkl'
	test_data_fname = 'MNIST_test.pkl'
	X_train, y_train = neuralnet.load_data(train_data_fname)
	X_valid, y_valid = neuralnet.load_data(valid_data_fname)
	X_test, y_test = neuralnet.load_data(test_data_fname)

	neuralnet.config['epochs'] = 100

	nnet = neuralnet.Neuralnetwork(neuralnet.config)

	training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(nnet, X_train, y_train, X_valid, y_valid, nnet.config)

	print("Optimal Number of Epochs: ", numEpochs)

	#setting thte model to the best weights and biases
	nnet.layers = best_model
	accuracy = neuralnet.test(nnet, X_test, y_test, nnet.config)
	print("Accuracy on Test Set: ", accuracy)


	#plotting results
	plt.plot(range(len(training_errors)), training_errors, "ro", color = "blue", label='Training Set Accuracy')
	plt.plot(range(len(validation_errors)), validation_errors, "ro", color = "red", label='Validation Set Accuracy')
	plt.legend(loc='upper left')
	plt.xlabel("Epochs")
	plt.ylabel("Percentage Correct")
	plt.title("Training on MNIST Dataset")
	plt.savefig('partC.png')
Example #2
0
def main():

    train_data_fname = 'MNIST_train.pkl'
    valid_data_fname = 'MNIST_valid.pkl'
    test_data_fname = 'MNIST_test.pkl'
    X_train, y_train = neuralnet.load_data(train_data_fname)
    X_valid, y_valid = neuralnet.load_data(valid_data_fname)
    X_test, y_test = neuralnet.load_data(test_data_fname)

    #found this as the optimal number of epochs from Part C
    neuralnet.config['epochs'] = 26

    testshapes = [[784, 25, 10], [784, 100, 10], [784, 47, 47, 10]]

    for shape in testshapes:
        neuralnet.config['layer_specs'] = shape

        network = neuralnet.Neuralnetwork(neuralnet.config)

        training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(
            network, X_train, y_train, X_valid, y_valid, network.config)

        network.layers = best_model
        accuracy = neuralnet.test(network, X_test, y_test, network.config)

        print("Shape: ", shape)
        print("Accuracy", accuracy)

        plt.plot(range(len(training_errors)),
                 training_errors,
                 "ro",
                 color="blue",
                 label='Training Set Accuracy')
        plt.plot(range(len(validation_errors)),
                 validation_errors,
                 "ro",
                 color="red",
                 label='Validation Set Accuracy')
        plt.legend(loc='upper left')
        plt.xlabel("Epochs")
        plt.ylabel("Percentage Correct")
        plt.title("Training with " + str(shape) + " Shape")
        name = "partF_" + str(shape) + ".png"
        plt.savefig(name)
        plt.close()
Example #3
0
def sanity_network(data, default_config):
    """
    Check implementation of the neural network's forward pass and backward pass.
    """
    # Set seed to reproduce results.
    np.random.seed(42)

    # Random input for our network.
    random_image = np.random.randn(1, 784)

    # Initialize the network using the default configuration
    nnet = neuralnet.Neuralnetwork(default_config)

    # Compute the forward pass.
    nnet(random_image, targets=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

    # Compute the backward pass.
    nnet.backward()

    layer_no = 0
    # print(data['nnet'].layers)
    for layer_idx, layer in enumerate(nnet.layers):
        if isinstance(layer, neuralnet.Layer):
            # print(layer)
            layer_no += 1
            error_x = np.sum(np.abs(data['nnet'].layers[layer_idx].x -
                                    layer.x))
            error_w = np.sum(np.abs(data['nnet'].layers[layer_idx].w -
                                    layer.w))
            error_b = np.sum(np.abs(data['nnet'].layers[layer_idx].b -
                                    layer.b))
            error_d_w = np.sum(
                np.abs(data['nnet'].layers[layer_idx].d_w - layer.d_w))
            error_d_b = np.sum(
                np.abs(data['nnet'].layers[layer_idx].d_b - layer.d_b))

            check_error(error_x, f"Layer{layer_no}: Input")
            check_error(error_w, f"Layer{layer_no}: Weights")
            check_error(error_b, f"Layer{layer_no}: Biases")
            check_error(error_d_w, f"Layer{layer_no}: Weight Gradient")
            check_error(error_d_b, f"Layer{layer_no}: Bias Gradient")

    print(20 * "-", "\n")
Example #4
0
def main():

	train_data_fname = 'MNIST_train.pkl'
	valid_data_fname = 'MNIST_valid.pkl'
	test_data_fname = 'MNIST_test.pkl'
	X_train, y_train = neuralnet.load_data(train_data_fname)
	X_valid, y_valid = neuralnet.load_data(valid_data_fname)
	X_test, y_test = neuralnet.load_data(test_data_fname)


	# found this as the optimal number of epochs from Part C
	# optimal value is about 26 epochs
	# To test regularization, going to check for a few more epochs
	neuralnet.config['epochs'] = 30

	regularization_constant_testers = [0.0001, 0.001]


	for regFactor in regularization_constant_testers:

		neuralnet.config['L2_penalty'] = regFactor
		network = neuralnet.Neuralnetwork(neuralnet.config)

		training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(network, X_train, y_train, X_valid, y_valid, network.config)
		
		network.layers = best_model
		accuracy = neuralnet.test(network, X_test, y_test, network.config)

		print("Regularization Constant: ", regFactor)
		print("Accuracy on Test Set: ", accuracy)
		print()
		
		plt.plot(range(len(training_errors)), training_errors,"ro", color = "blue", label= 'Training Set Accuracy')
		plt.plot(range(len(validation_errors)), validation_errors,"ro", color = "red", label= 'Validation Set Accuracy')
		plt.legend(loc='upper left')
		plt.xlabel("Epochs")
		plt.ylabel("Percentage Correct")
		plt.title("Training with regularization factor: " + str(regFactor))
		name = "partD_" + str(regFactor) + ".png"
		plt.savefig(name)
		plt.close()
def main():
	
	train_data_fname = 'MNIST_train.pkl'
	valid_data_fname = 'MNIST_valid.pkl'
	test_data_fname = 'MNIST_test.pkl'
	X_train, y_train = neuralnet.load_data(train_data_fname)
	X_valid, y_valid = neuralnet.load_data(valid_data_fname)
	X_test, y_test = neuralnet.load_data(test_data_fname)



	activation_functions = ["tanh", "sigmoid", "ReLU"]

	#found this as the optimal number of epochs from Part C
	neuralnet.config['epochs'] = 26
	

	for function in activation_functions:
		neuralnet.config['activation'] = function
		network = neuralnet.Neuralnetwork(neuralnet.config)

		training_errors, validation_errors, best_model, numEpochs = neuralnet.trainer(network, X_train, y_train, X_valid, y_valid, network.config)
		
		network.layers = best_model
		accuracy = neuralnet.test(network, X_test, y_test, network.config)

		print("Activation Function Used: ", function)
		print("Accuracy: ", accuracy)

		plt.plot(range(len(training_errors)), training_errors,"ro", color = "blue", label='Training Set Accuracy')
		plt.plot(range(len(validation_errors)), validation_errors,"ro", color = "red", label='Validation Set Accuracy')
		plt.legend(loc='upper left')
		plt.xlabel("Epochs")
		plt.ylabel("Percentage Correct")
		plt.title("Training with " + function + " Function")
		name = "partE_" + str(function) + ".png"
		plt.savefig(name)
		plt.close()
def main():
    # make_pickle()
    benchmark_data = pickle.load(open('validate_data.pkl', 'rb'),
                                 encoding='latin1')

    config = {}
    config['layer_specs'] = [
        784, 100, 100, 10
    ]  # The length of list denotes number of hidden layers; each element denotes number of neurons in that layer; first element is the size of input layer, last element is the size of output layer.
    config[
        'activation'] = 'sigmoid'  # Takes values 'sigmoid', 'tanh' or 'ReLU'; denotes activation function for hidden layers
    config[
        'batch_size'] = 1000  # Number of training samples per batch to be passed to network
    config['epochs'] = 50  # Number of epochs to train the model
    config['early_stop'] = True  # Implement early stopping or not
    config[
        'early_stop_epoch'] = 5  # Number of epochs for which validation loss increases to be counted as overfitting
    config['L2_penalty'] = 0  # Regularization constant
    config['momentum'] = False  # Denotes if momentum is to be applied or not
    config[
        'momentum_gamma'] = 0.9  # Denotes the constant 'gamma' in momentum expression

    np.random.seed(42)
    x = np.random.randn(1, 100)
    act_sigmoid = neuralnet.Activation('sigmoid')
    act_tanh = neuralnet.Activation('tanh')
    act_ReLU = neuralnet.Activation('ReLU')

    out_sigmoid = act_sigmoid.forward_pass(x)
    err_sigmoid = np.sum(np.abs(benchmark_data['out_sigmoid'] - out_sigmoid))
    check_error(err_sigmoid, "Sigmoid Forward Pass")

    out_tanh = act_tanh.forward_pass(x)
    err_tanh = np.sum(np.abs(benchmark_data['out_tanh'] - out_tanh))
    check_error(err_tanh, "Tanh Forward Pass")

    out_ReLU = act_ReLU.forward_pass(x)
    err_ReLU = np.sum(np.abs(benchmark_data['out_ReLU'] - out_ReLU))
    check_error(err_ReLU, "ReLU Forward Pass")

    print("**************")

    grad_sigmoid = act_sigmoid.backward_pass(1.0)
    err_sigmoid_grad = np.sum(
        np.abs(benchmark_data['grad_sigmoid'] - grad_sigmoid))
    check_error(err_sigmoid_grad, "Sigmoid Gradient")

    grad_tanh = act_tanh.backward_pass(1.0)
    err_tanh_grad = np.sum(np.abs(benchmark_data['grad_tanh'] - grad_tanh))
    check_error(err_tanh_grad, "Tanh Gradient")

    grad_ReLU = act_ReLU.backward_pass(1.0)
    err_ReLU_grad = np.sum(np.abs(benchmark_data['grad_ReLU'] - grad_ReLU))
    check_error(err_ReLU_grad, "ReLU Gradient")

    np.random.seed(42)
    x_image = np.random.randn(1, 784)

    nnet = neuralnet.Neuralnetwork(config)
    nnet.forward_pass(x_image,
                      targets=np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
    nnet.backward_pass()

    layer_no = 0
    for layer_idx, layer in enumerate(nnet.layers):
        if isinstance(layer, neuralnet.Layer):
            layer_no += 1
            error_x = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].x - layer.x))
            error_w = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].w - layer.w))
            error_b = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].b - layer.b))
            error_d_w = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].d_w -
                       layer.d_w))
            error_d_b = np.sum(
                np.abs(benchmark_data['nnet'].layers[layer_idx].d_b -
                       layer.d_b))

            check_error(error_x, "Layer{} Input".format(layer_no))
            check_error(error_w, "Layer{} Weights".format(layer_no))
            check_error(error_b, "Layer{} Biases".format(layer_no))
            check_error(error_d_w, "Layer{} Weight Gradient".format(layer_no))
            check_error(error_d_b, "Layer{} Bias Gradient".format(layer_no))
def main():
    train_data_fname = 'MNIST_train.pkl'

    X_train, y_train = neuralnet.load_data(train_data_fname)
    epsilon = 0.1
    epsilon_squared = np.power(0.01, 2)
    nnet = neuralnet.Neuralnetwork(neuralnet.config)

    gradients = []    # will store lists of gradients and approximate gradients
    all_correct = True

    nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])

    num_layers = len(nnet.layers)

    for i in range(400):       # run the network on this training example
        nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
        nnet.backward_pass()
        for layer in nnet.layers:
            if isinstance(layer, neuralnet.Layer):
                layer.w = layer.w + 0.001 * layer.d_w
                layer.b = layer.b + 0.001 * layer.d_b


    # Check the gradient
    j = 0
    for layer in nnet.layers:  # add and subtract epsilon to the weights and do forward_pass to find E(w + e) and E(w - e)
        if isinstance(layer, neuralnet.Layer):
            if j == 0:    # input to hidden Layer
                original_w1 = layer.w[0][0]     # save original weight
                layer.w[0][0] = layer.w[0][0] + epsilon     # add epsilon and compute loss
                input_to_hidden_w1_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                layer.w[0][0] = layer.w[0][0] - epsilon    # subtract epsilon and compute loss
                input_to_hidden_w1_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                approx_grad1 = (input_to_hidden_w1_plus_loss - input_to_hidden_w1_minus_loss) / (2 * epsilon)    # approx_grad = E(w + e) - E(w - e) / 2e
                layer.w[0][0] = original_w1    # set weight back to original weight
                nnet.backward_pass()       # back pass to find gradient
                for each_layer in nnet.layers:
                    if nnet.layers.index(each_layer) == 0:
                        grad = each_layer.d_w[0][0]     # find the gradient
                if grad_diff(grad, approx_grad1) > epsilon_squared:    # if gradients differ by episilon squared, the gradient is incorrect
                    all_correct = False
                    print('Input to hidden gradient is incorrect')
                gradients.append([grad, approx_grad1])     # append the back pass gradient and the approximate gradient as a list to gradients

                original_w2 = layer.w[0][1]
                layer.w[0][1] = layer.w[0][1] + epsilon
                input_to_hidden_w2_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                layer.w[0][1] = layer.w[0][1] - epsilon
                input_to_hidden_w2_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                approx_grad2 = (input_to_hidden_w2_plus_loss - input_to_hidden_w2_minus_loss) / (2 * epsilon)
                layer.w[0][1] = original_w2
                nnet.backward_pass()
                for each_layer in nnet.layers:
                    if nnet.layers.index(each_layer) == 0:
                        grad = each_layer.d_w[0][1]
                if grad_diff(grad, approx_grad2) > epsilon_squared:
                    all_correct = False
                    print('Input to hidden gradient is incorrect')
                gradients.append([grad, approx_grad2])

                original_w3 = layer.b[0][0]
                layer.b[0][0] = layer.b[0][0] + epsilon
                hidden_bias_w_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                layer.b[0][0] = layer.b[0][0] - epsilon
                hidden_bias_w_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                approx_grad3 = (hidden_bias_w_plus_loss - hidden_bias_w_minus_loss) / (2 * epsilon)
                layer.b[0][0] = original_w3
                nnet.backward_pass()
                for each_layer in nnet.layers:
                    if nnet.layers.index(each_layer) == 0:
                        grad = each_layer.d_b[0][0]
                if grad_diff(grad, approx_grad3) > epsilon_squared:
                    all_correct = False
                    print('Hidden bias gradient is incorrect')
                gradients.append([grad, approx_grad3])

            if j == num_layers - 1:        # hidden layer to output layer
                original_w4 = layer.w[0][0]
                layer.w[0][0] = layer.w[0][0] + epsilon
                hidden_to_output_w1_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                layer.w[0][0] = layer.w[0][0] - epsilon
                hidden_to_output_w1_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                approx_grad4 = (hidden_to_output_w1_plus_loss - hidden_to_output_w1_minus_loss) / (2 * epsilon)
                layer.w[0][0] = original_w4
                nnet.backward_pass()
                for each_layer in nnet.layers:
                    if nnet.layers.index(each_layer) == num_layers - 1:
                        grad = each_layer.d_w[0][0]
                if grad_diff(grad, approx_grad4) > epsilon_squared:
                    all_correct = False
                    print('Hidden to output gradient is incorrect')
                gradients.append([grad, approx_grad4])

                original_w5 = layer.w[0][1]
                layer.w[0][1] = layer.w[0][1] + epsilon
                hidden_to_output_w2_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                layer.w[0][1] = layer.w[0][1] - epsilon
                hidden_to_output_w2_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                approx_grad5 = (hidden_to_output_w2_plus_loss - hidden_to_output_w2_minus_loss) / (2 * epsilon)
                nnet.backward_pass()
                for each_layer in nnet.layers:
                    if nnet.layers.index(each_layer) == num_layers - 1:
                        grad = each_layer.d_w[0][1]
                if grad_diff(grad, approx_grad5) > epsilon_squared:
                    all_correct = False
                    print('Hidden to output gradient is incorrect')
                gradients.append([grad, approx_grad5])

                original_w6 = layer.b[0][6]
                layer.b[0][6] = layer.b[0][6] + epsilon
                output_bias_w_plus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                layer.b[0][6] = layer.b[0][6] - epsilon
                output_bias_w_minus_loss = nnet.forward_pass(X_train[0].reshape(1,784), y_train[0])[0]
                approx_grad6 = (output_bias_w_plus_loss - output_bias_w_minus_loss) / (2 * epsilon)
                layer.b[0][6] = original_w6
                nnet.backward_pass()
                for each_layer in nnet.layers:
                    if nnet.layers.index(each_layer) == num_layers - 1:
                        grad = each_layer.d_b[0][6]
                if grad_diff(grad, approx_grad6) > epsilon_squared:
                    print(grad_diff(grad, approx_grad6))
                    all_correct = False
                    print('Output bias gradient is incorrect')
                gradients.append([grad, approx_grad6])
        j = j + 1

    if all_correct:
        print('All gradients are correct')

    print('***********************************************')
    print('Input to hidden weight 1:')
    print('Gradient approximation:', gradients[0][1])
    print('Actual gradient:', gradients[0][0])
    print('Input to hidden weight 2:')
    print('Gradient approximation:', gradients[1][1])
    print('Actual gradient:', gradients[1][0])
    print('Hidden bias weight:')
    print('Gradient approximation:', gradients[2][1])
    print('Actual gradient:', gradients[2][0])
    print('Hidden to output weight 1:')
    print('Gradient approximation:', gradients[3][1])
    print('Actual gradient:', gradients[3][0])
    print('Hidden to output weight 2:')
    print('Gradient approximation:', gradients[4][1])
    print('Actual gradient:', gradients[4][0])
    print('Output bias weight:')
    print('Gradient approximation:', gradients[5][1])
    print('Actual gradient:', gradients[5][0])