def test_MLP_model_mnist(dataset_name='mnist.pkl.gz',
                         learning_rate=0.01,
                         L1_reg=0.00,
                         L2_reg=0.0001,
                         n_epochs=1000,
                         batch_size=20,
                         n_hidden=500):
    # Set up the dataset
    dataset = load_data(dataset_name)
    # Split the data into a training, validation and test set
    train_data, train_labels = dataset[0]
    test_data, test_labels = dataset[1]
    validation_data, validation_labels = dataset[2]
    # Compute number of minibatches for each set
    n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_data.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size
    data_dim = (28, 28) # The dimension of each image in the dataset
    data_classes = 10 # The number of classes within the data
    
    # Build the model
    # ---------------

    # Allocate symbolic variables for data
    index = T.lscalar() # This is the index to a minibatch
    x = T.matrix('x') # Data (rasterized images)
    y = T.ivector('y') # Labels (1d vector of ints)

    rng = np.random.RandomState(1234)

    # Construct MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=data_dim[0]*data_dim[1],
                     n_hidden=n_hidden,
                     n_out=data_classes)

    # Cost to minimize during training
    # Add regularization terms
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr)

    # Compile a Theano function that computes mistakes made by the model on a minibatch
    test_model = th.function(inputs=[index], # This function is for the test data   
                             outputs=classifier.errors(y),
                             givens={x: test_data[index * batch_size: (index + 1) * batch_size],
                                     y: test_labels[index * batch_size: (index + 1) * batch_size]})
    validate_model = th.function(inputs=[index], # This function is for the validation data    
                                 outputs=classifier.errors(y),
                                 givens={x: validation_data[index * batch_size: (index + 1) * batch_size],
                                         y: validation_labels[index * batch_size: (index + 1) * batch_size]})
    # Compute the gradient of cost with respect to theta
    grad_params = [T.grad(cost,param) for param in classifier.params]

    # Specify how to update model parameters as a list of (variable, update expression) pairs
    updates = [(param, param - learning_rate * grad_param) for param, grad_param in zip(classifier.params, grad_params)]

    # Compile Theano function that returns the cost and updates parameters of model based on update rules
    train_model = th.function(inputs=[index], # Index in minibatch that defines x with label y   
                             outputs=cost, # Cost/loss associated with x,y
                             updates=updates,
                             givens={x: train_data[index * batch_size: (index + 1) * batch_size],
                                     y: train_labels[index * batch_size: (index + 1) * batch_size]})

    # Train the model
    # ---------------

    # Setup the early-stopping parameters
    patience = 10000 # Minimum number of examples to examine
    patience_increase = 2 # How much longer to wait once a new best is found
    improvement_threshold = 0.995 # Value of a significant relative improvement
    validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating
    best_validation_loss = np.inf
    test_score = 0
    start_time = time.clock()

    # Setup the training loop
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # Set the iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # Compute the zero-one loss on the validation set
                validation_losses = [validate_model(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch,
                                                                             minibatch_index + 1,
                                                                             n_train_batches,
                                                                             this_validation_loss * 100.))
                # Check if current validation score is the best
                if this_validation_loss < best_validation_loss:
                    # Improve the patience is loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss
                    # Test on test set
                    test_losses = [test_model(i) for i in xrange(n_test_batches)]
                    test_score = np.mean(test_losses)
                    print('epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch,
                                                                                         minibatch_index + 1,
                                                                                         n_train_batches,
                                                                                         test_score * 100.))
            # Stop the loop if we have exhausted our patience
            if patience <= iter:
                done_looping = True
                break;
    # The loop has ended so record the time it took
    end_time = time.clock()
    # Print out results and timing information
    print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100.,
                                                                                                      test_score * 100.)) 
    print 'The code ran for %d epochs with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
Beispiel #2
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):

    """
    Run MLP SGD on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    classifier = MLP(
        rng = rng,
        input = x,
        n_in = 28 * 28, #MNIST specific
        n_hidden = n_hidden,
        n_out = 10
    )

    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )

    # classify errors
    test_model = theano.function(
        inputs = [index],
        outputs = classifier.errors(y),
        givens = {
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # compute gradient of cost with respect to all params
    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatches before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in range(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) & validation_frequency == 0:
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                if this_validation_loss < best_validation_loss:
                    if(
                        this_validation_loss < best_validation_loss * improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test on test set
                    test_losses = [test_model(i) for i
                                   in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

                if patience <= iter:
                    done_looping = True
                    break

        end_time = timeit.default_timer()

        print(('Optimization complete. Best validation score of %f %% '
               'obtained at iteration %i, with test performance %f %%') %
              (best_validation_loss * 100., best_iter + 1, test_score * 100.))
        print(('The code for file ' +
               os.path.split(__file__)[1] +
               ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def test_MLP_model_mnist(dataset_name='mnist.pkl.gz',
                         learning_rate=0.01,
                         L1_reg=0.00,
                         L2_reg=0.0001,
                         n_epochs=1000,
                         batch_size=20,
                         n_hidden=500):
    # Set up the dataset
    dataset = load_data(dataset_name)
    # Split the data into a training, validation and test set
    train_data, train_labels = dataset[0]
    test_data, test_labels = dataset[1]
    validation_data, validation_labels = dataset[2]
    # Compute number of minibatches for each set
    n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = validation_data.get_value(
        borrow=True).shape[0] / batch_size
    n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size
    data_dim = (28, 28)  # The dimension of each image in the dataset
    data_classes = 10  # The number of classes within the data

    # Build the model
    # ---------------

    # Allocate symbolic variables for data
    index = T.lscalar()  # This is the index to a minibatch
    x = T.matrix('x')  # Data (rasterized images)
    y = T.ivector('y')  # Labels (1d vector of ints)

    rng = np.random.RandomState(1234)

    # Construct MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=data_dim[0] * data_dim[1],
                     n_hidden=n_hidden,
                     n_out=data_classes)

    # Cost to minimize during training
    # Add regularization terms
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)

    # Compile a Theano function that computes mistakes made by the model on a minibatch
    test_model = th.function(
        inputs=[index],  # This function is for the test data   
        outputs=classifier.errors(y),
        givens={
            x: test_data[index * batch_size:(index + 1) * batch_size],
            y: test_labels[index * batch_size:(index + 1) * batch_size]
        })
    validate_model = th.function(
        inputs=[index],  # This function is for the validation data    
        outputs=classifier.errors(y),
        givens={
            x: validation_data[index * batch_size:(index + 1) * batch_size],
            y: validation_labels[index * batch_size:(index + 1) * batch_size]
        })
    # Compute the gradient of cost with respect to theta
    grad_params = [T.grad(cost, param) for param in classifier.params]

    # Specify how to update model parameters as a list of (variable, update expression) pairs
    updates = [(param, param - learning_rate * grad_param)
               for param, grad_param in zip(classifier.params, grad_params)]

    # Compile Theano function that returns the cost and updates parameters of model based on update rules
    train_model = th.function(
        inputs=[index],  # Index in minibatch that defines x with label y   
        outputs=cost,  # Cost/loss associated with x,y
        updates=updates,
        givens={
            x: train_data[index * batch_size:(index + 1) * batch_size],
            y: train_labels[index * batch_size:(index + 1) * batch_size]
        })

    # Train the model
    # ---------------

    # Setup the early-stopping parameters
    patience = 10000  # Minimum number of examples to examine
    patience_increase = 2  # How much longer to wait once a new best is found
    improvement_threshold = 0.995  # Value of a significant relative improvement
    validation_frequency = min(n_train_batches, patience /
                               2)  # Number of minibatches before validating
    best_validation_loss = np.inf
    test_score = 0
    start_time = time.clock()

    # Setup the training loop
    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_model(minibatch_index)
            # Set the iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index
            if (iter + 1) % validation_frequency == 0:
                # Compute the zero-one loss on the validation set
                validation_losses = [
                    validate_model(i) for i in xrange(n_valid_batches)
                ]
                this_validation_loss = np.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                # Check if current validation score is the best
                if this_validation_loss < best_validation_loss:
                    # Improve the patience is loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)
                    best_validation_loss = this_validation_loss
                    # Test on test set
                    test_losses = [
                        test_model(i) for i in xrange(n_test_batches)
                    ]
                    test_score = np.mean(test_losses)
                    print(
                        'epoch %i, minibatch %i/%i, test error of best model %f %%'
                        % (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))
            # Stop the loop if we have exhausted our patience
            if patience <= iter:
                done_looping = True
                break
    # The loop has ended so record the time it took
    end_time = time.clock()
    # Print out results and timing information
    print(
        'Optimization complete with best validation score of %f %%, with test performance %f %%'
        % (best_validation_loss * 100., test_score * 100.))
    print 'The code ran for %d epochs with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))