Esempio n. 1
0
from neural_network import NeuralNetwork
from mnist_data import get_mnist_data
import time
import numpy as np
import cPickle as pickle

#Automated simulations to record accuracy, runtime, number of iterations.
#This is done 15 times for each (algorithm, noise type) tuple.
#Results save to .pkl files.

#training: 55,000 examples. validation: 5,000 examples. testing: 10,000 examples.
train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = get_mnist_data(
)

optimizer_parameters = {}
optimizer_parameters['OriginalGradientDescent'] = {'learning_rate': 0.5}
optimizer_parameters['CustomGradientDescent'] = {'learning_rate': 0.5}
optimizer_parameters['OriginalAdam'] = {}
optimizer_parameters['CustomAdam'] = {}
optimizer_parameters['LBFGS'] = {'max_hist': 1000}
optimizer_parameters['ConjugateGradient'] = {
    'learning_rate': 0.0001,
    'min_step': 0.02
}
optimizer_parameters['HessianFree'] = {}


def run_single_set(num_runs,
                   num_hidden_layers,
                   num_hidden_nodes,
                   auto_terminate_num_iter,
Esempio n. 2
0
import numpy as np
from mnist_data import get_mnist_data

from MLP import MLP
from MLP_SGD import MLP_SGD

# get data from mnist_data file
test_data, test_targets, train_data, train_targets = get_mnist_data()

# initialize a Stochaistic Gradient Descent Multi Layer Perceptron

mlp_sgd = MLP_SGD(lr=1,
                  sizes=[784, 30, 16, 10],
                  activation_list=['sigmoid', 'softmax'])
outputs1 = mlp_sgd.forward(test_data)
print("Before update: ", mlp_sgd.loss(outputs1, test_targets))
print("Accuracy: ", mlp_sgd.evaluate(test_data, test_targets))

mlp_sgd.fit(train_data, train_targets, 5, 10)

outputs2 = mlp_sgd.forward(test_data)
print("Loss after update: ", mlp_sgd.loss(outputs2, test_targets))
print("Accuracy: ", mlp_sgd.evaluate(test_data, test_targets))
Esempio n. 3
0
    def test_network_lesson09(self):
        """
        Test initializing a neural NeuralNetwork
        """
        input_shape = (28, 28, 1)
        output_count = 10
        network = NeuralNetwork('convnet for MNIST', input_shape, output_count)

        # normal distribution parameters for random weights
        mean = 0.0
        stddev = 0.1

        # General convolution shapes and parameters common to all convolutional layers
        conv_stride_shape = (1, 1)
        conv_pad_shape = (0, 0)
        conv_pad_type = 'SAME'

        pool_stride_shape = (2, 2)
        pool_shape = (2, 2)
        pool_pad_type = 'SAME'

        activation = 'relu'

        # Kernel depths and sizes for each convolution layer
        depths = [32, 64, 128]
        kernel_shapes = [(5, 5, depths[0]), (5, 5, depths[1]), (5, 5, depths[2])]
        conv_layer_count = len(depths)

        # Expected values for assertions
        after_conv_output_shapes = [(28, 28, depths[0]), (14, 14, depths[1]), (7, 7, depths[2])]
        after_pool_output_shapes = [(14, 14, depths[0]), (7, 7, depths[1]), (4, 4, depths[2])]

        # Create convolutional layers
        conv = None
        for i in range(conv_layer_count):
            name = 'l{:d}'.format(i)
            if i > 0:
                input_shape = conv.output_shape
            conv = ConvolutionalLayer(name, input_shape, kernel_shapes[i], conv_stride_shape, \
                conv_pad_shape, conv_pad_type, activation)
            self.assertEqual(after_conv_output_shapes[i], conv.output_shape)
            conv.add_pooling('max', pool_shape, pool_stride_shape, pool_pad_type)
            self.assertEqual(after_pool_output_shapes[i], conv.output_shape)
            network.add_layer(conv, mean, stddev)

        # Create linear layers

        # Output sizes for linear layers
        linear_input_sizes = [4 * 4 * 128, 512]
        linear_output_sizes = [512, 10]
        linear_activations = ['tanh', None]

        for i, input_size in enumerate(linear_input_sizes):
            layer_index = i + conv_layer_count
            name = 'l{:d}'.format(layer_index)
            linear = LinearLayer(name, input_size, linear_output_sizes[i], linear_activations[i])
            network.add_layer(linear, mean, stddev)

        # MNIST classify 10 digits
        network.define_network()

        learning_rate = 0.001
        network.define_operations(learning_rate, 'gradient_descent')

        epochs = 10
        batch_size = 128
        saver = tf.train.Saver()
        (train_inputs, train_labels, valid_inputs, valid_labels, test_inputs, test_labels) = \
            get_mnist_data()

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())

            network.train_with_validate(sess, train_inputs, train_labels, valid_inputs, \
                valid_labels, epochs, batch_size)

            test_accuracy = network.evaluate_in_batches(sess, test_inputs, test_labels, batch_size)
            print("Test accuracy:", test_accuracy)

            saver.save(sess, 'convnet')
            print("Model saved")
Esempio n. 4
0
def main():
    """Takes the MLP class for a test drive"""

    parser = argparse.ArgumentParser(description='Train MLP on MNIST dataset')

    parser.add_argument(
        '-mi',
        '--max_iter',
        required=False,
        default=50,
        type=int,
        help='Number of iterations for stochastic gradient descent')
    parser.add_argument('-HL_0',
                        '--HL_0',
                        type=int,
                        required=False,
                        default=784,
                        help='Set the size of the input layer.')
    parser.add_argument('-HL_1',
                        '--HL_1',
                        type=int,
                        required=False,
                        default=500,
                        help='Set the size of the second layer.')
    parser.add_argument('-HL_2',
                        '--HL_2',
                        type=int,
                        required=False,
                        default=500,
                        help='Set the size of the third layer.')
    parser.add_argument('-HL_3',
                        '--HL_3',
                        type=int,
                        required=False,
                        default=500,
                        help='Set the size of the fourth layer.')
    parser.add_argument('-HL_4',
                        '--HL_4',
                        type=int,
                        required=False,
                        default=10,
                        help='Set the size of the output layer.')
    parser.add_argument(
        '-bs',
        '--batch_size',
        required=False,
        type=int,
        default=100,
        help=
        'Set the size of the random samples chosen in each stochastic gradient computation.'
    )
    parser.add_argument(
        '-lr',
        '--learning_rate',
        required=False,
        type=float,
        help='Set the learning rate for the stochastic gradient descent.',
        default=0.03)
    parser.add_argument(
        '-rp',
        '--reg_param',
        required=False,
        type=float,
        default=0,
        help='Set weight parameter for regularization penalty term.')
    parser.add_argument(
        '-ot',
        '--output_type',
        required=False,
        default='softmax',
        help='Set the type of the output layer activation function.',
        choices=['sigmoid', 'softmax'])

    opts = vars(parser.parse_args())

    max_iter = opts['max_iter']
    HL_0 = opts['HL_0']
    HL_1 = opts['HL_1']
    HL_2 = opts['HL_2']
    HL_3 = opts['HL_3']
    HL_4 = opts['HL_4']
    reg_param = opts['reg_param']
    output_type = opts['output_type']
    batch_size = opts['batch_size']
    learning_rate = opts['learning_rate']

    print("Getting data...")
    X_train, y_train, X_test, y_test = get_mnist_data()
    print("Got data. Creating...")

    model = MLP()

    model.add_layer(HL_0)
    model.add_layer(HL_1)
    model.add_layer(HL_2)
    model.add_layer(HL_3)
    model.add_layer(HL_4, output_type)

    model.fix()

    input("Created model. Press enter to view blue print.")

    print(model)

    input("Press enter to fit model on training data.")
    # testing fit method
    model.fit(X_train=X_train,
              y_train=y_train,
              reg_param=reg_param,
              batch_size=batch_size,
              max_iter=max_iter,
              learning_rate=learning_rate)

    input("Press enter to transform test data.")
    print("Predicting...")

    # testing transform method
    y_pred_test = model.transform(X_test)

    input("Test data transformed. Press enter to view evaluation summary.")
    #testing evaluate method
    model.evaluate(X_train, y_train)

    print(classification_report(y_test, y_pred_test))

    input("Press enter to quit.")
Esempio n. 5
0
    def test_network_lenet(self):
        """
        Test using the lenet5 architecture
        """
        input_shape = (32, 32, 1)
        output_count = 10
        network = NeuralNetwork('lenet5 for MNIST', input_shape, output_count)

        # normal distribution parameters for random weights
        mean = 0.0
        stddev = 0.1

        # General convolution shapes and parameters common to all convolutional layers
        conv_stride_shape = (1, 1)
        conv_pad_shape = (0, 0)
        conv_pad_type = 'VALID'

        pool_stride_shape = (2, 2)
        pool_shape = (2, 2)
        pool_pad_type = 'VALID'

        activation = 'relu'

        # Kernel depths and sizes for each convolution layer
        depths = [6, 16]
        kernel_shapes = [(5, 5, depths[0]), (5, 5, depths[1])]
        conv_layer_count = len(depths)

        # Create convolutional layers
        conv = None
        for i in range(conv_layer_count):
            name = 'l{:d}'.format(i)
            if i > 0:
                input_shape = conv.output_shape
            conv = ConvolutionalLayer(name, input_shape, kernel_shapes[i], conv_stride_shape, \
                conv_pad_shape, conv_pad_type, activation)
            conv.add_pooling('max', pool_shape, pool_stride_shape, pool_pad_type)
            network.add_layer(conv, mean, stddev)

        # Linear layer dimensions
        linear_input_sizes = [400, 120, 84]
        linear_output_sizes = [120, 84, 10]
        linear_activations = ['relu', 'relu', None]

        # Create linear layers
        for i, input_size in enumerate(linear_input_sizes):
            layer_index = i + conv_layer_count
            name = 'l{:d}'.format(layer_index)
            linear = LinearLayer(name, input_size, linear_output_sizes[i], linear_activations[i])
            linear.init_weights_and_biases(mean, stddev)
            network.add_layer(linear, mean, stddev)

        network.define_network()

        learning_rate = 0.001
        network.define_operations(learning_rate, 'adam')

        # Prepare data
        (train_inputs, train_labels, valid_inputs, valid_labels, test_inputs, test_labels) = \
            get_mnist_data(padding=(2, 2))

        epochs = 10
        batch_size = 128
        saver = tf.train.Saver()

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())

            network.train_with_validate(sess, train_inputs, train_labels, valid_inputs, \
                valid_labels, epochs, batch_size)

            test_accuracy = network.evaluate_in_batches(sess, test_inputs, test_labels, batch_size)
            print("Test accuracy:", test_accuracy)

            saver.save(sess, 'lenet')
            print("Model saved")