def test_MLP_model_mnist(dataset_name='mnist.pkl.gz', learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): # Set up the dataset dataset = load_data(dataset_name) # Split the data into a training, validation and test set train_data, train_labels = dataset[0] test_data, test_labels = dataset[1] validation_data, validation_labels = dataset[2] # Compute number of minibatches for each set n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size n_valid_batches = validation_data.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size data_dim = (28, 28) # The dimension of each image in the dataset data_classes = 10 # The number of classes within the data # Build the model # --------------- # Allocate symbolic variables for data index = T.lscalar() # This is the index to a minibatch x = T.matrix('x') # Data (rasterized images) y = T.ivector('y') # Labels (1d vector of ints) rng = np.random.RandomState(1234) # Construct MLP class classifier = MLP(rng=rng, input=x, n_in=data_dim[0]*data_dim[1], n_hidden=n_hidden, n_out=data_classes) # Cost to minimize during training # Add regularization terms cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # Compile a Theano function that computes mistakes made by the model on a minibatch test_model = th.function(inputs=[index], # This function is for the test data outputs=classifier.errors(y), givens={x: test_data[index * batch_size: (index + 1) * batch_size], y: test_labels[index * batch_size: (index + 1) * batch_size]}) validate_model = th.function(inputs=[index], # This function is for the validation data outputs=classifier.errors(y), givens={x: validation_data[index * batch_size: (index + 1) * batch_size], y: validation_labels[index * batch_size: (index + 1) * batch_size]}) # Compute the gradient of cost with respect to theta grad_params = [T.grad(cost,param) for param in classifier.params] # Specify how to update model parameters as a list of (variable, update expression) pairs updates = [(param, param - learning_rate * grad_param) for param, grad_param in zip(classifier.params, grad_params)] # Compile Theano function that returns the cost and updates parameters of model based on update rules train_model = th.function(inputs=[index], # Index in minibatch that defines x with label y outputs=cost, # Cost/loss associated with x,y updates=updates, givens={x: train_data[index * batch_size: (index + 1) * batch_size], y: train_labels[index * batch_size: (index + 1) * batch_size]}) # Train the model # --------------- # Setup the early-stopping parameters patience = 10000 # Minimum number of examples to examine patience_increase = 2 # How much longer to wait once a new best is found improvement_threshold = 0.995 # Value of a significant relative improvement validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating best_validation_loss = np.inf test_score = 0 start_time = time.clock() # Setup the training loop done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # Set the iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # Compute the zero-one loss on the validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # Check if current validation score is the best if this_validation_loss < best_validation_loss: # Improve the patience is loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # Test on test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_losses) print('epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # Stop the loop if we have exhausted our patience if patience <= iter: done_looping = True break; # The loop has ended so record the time it took end_time = time.clock() # Print out results and timing information print('Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) print 'The code ran for %d epochs with %f epochs/sec' % (epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): """ Run MLP SGD on MNIST :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = MLP( rng = rng, input = x, n_in = 28 * 28, #MNIST specific n_hidden = n_hidden, n_out = 10 ) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # classify errors test_model = theano.function( inputs = [index], outputs = classifier.errors(y), givens = { x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute gradient of cost with respect to all params gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatches before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) & validation_frequency == 0: validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) if this_validation_loss < best_validation_loss: if( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test on test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def test_MLP_model_mnist(dataset_name='mnist.pkl.gz', learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500): # Set up the dataset dataset = load_data(dataset_name) # Split the data into a training, validation and test set train_data, train_labels = dataset[0] test_data, test_labels = dataset[1] validation_data, validation_labels = dataset[2] # Compute number of minibatches for each set n_train_batches = train_data.get_value(borrow=True).shape[0] / batch_size n_valid_batches = validation_data.get_value( borrow=True).shape[0] / batch_size n_test_batches = test_data.get_value(borrow=True).shape[0] / batch_size data_dim = (28, 28) # The dimension of each image in the dataset data_classes = 10 # The number of classes within the data # Build the model # --------------- # Allocate symbolic variables for data index = T.lscalar() # This is the index to a minibatch x = T.matrix('x') # Data (rasterized images) y = T.ivector('y') # Labels (1d vector of ints) rng = np.random.RandomState(1234) # Construct MLP class classifier = MLP(rng=rng, input=x, n_in=data_dim[0] * data_dim[1], n_hidden=n_hidden, n_out=data_classes) # Cost to minimize during training # Add regularization terms cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # Compile a Theano function that computes mistakes made by the model on a minibatch test_model = th.function( inputs=[index], # This function is for the test data outputs=classifier.errors(y), givens={ x: test_data[index * batch_size:(index + 1) * batch_size], y: test_labels[index * batch_size:(index + 1) * batch_size] }) validate_model = th.function( inputs=[index], # This function is for the validation data outputs=classifier.errors(y), givens={ x: validation_data[index * batch_size:(index + 1) * batch_size], y: validation_labels[index * batch_size:(index + 1) * batch_size] }) # Compute the gradient of cost with respect to theta grad_params = [T.grad(cost, param) for param in classifier.params] # Specify how to update model parameters as a list of (variable, update expression) pairs updates = [(param, param - learning_rate * grad_param) for param, grad_param in zip(classifier.params, grad_params)] # Compile Theano function that returns the cost and updates parameters of model based on update rules train_model = th.function( inputs=[index], # Index in minibatch that defines x with label y outputs=cost, # Cost/loss associated with x,y updates=updates, givens={ x: train_data[index * batch_size:(index + 1) * batch_size], y: train_labels[index * batch_size:(index + 1) * batch_size] }) # Train the model # --------------- # Setup the early-stopping parameters patience = 10000 # Minimum number of examples to examine patience_increase = 2 # How much longer to wait once a new best is found improvement_threshold = 0.995 # Value of a significant relative improvement validation_frequency = min(n_train_batches, patience / 2) # Number of minibatches before validating best_validation_loss = np.inf test_score = 0 start_time = time.clock() # Setup the training loop done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # Set the iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # Compute the zero-one loss on the validation set validation_losses = [ validate_model(i) for i in xrange(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # Check if current validation score is the best if this_validation_loss < best_validation_loss: # Improve the patience is loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # Test on test set test_losses = [ test_model(i) for i in xrange(n_test_batches) ] test_score = np.mean(test_losses) print( 'epoch %i, minibatch %i/%i, test error of best model %f %%' % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # Stop the loop if we have exhausted our patience if patience <= iter: done_looping = True break # The loop has ended so record the time it took end_time = time.clock() # Print out results and timing information print( 'Optimization complete with best validation score of %f %%, with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) print 'The code ran for %d epochs with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))