def test_rbm(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # start-snippet-5 # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] }, name='train_rbm' ) plotting_time = 0. start_time = timeit.default_timer() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix image = Image.fromarray( tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) end_time = timeit.default_timer() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) # end-snippet-5 start-snippet-6 ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX ) ) # end-snippet-6 start-snippet-7 plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting ( [ presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples ], updates ) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every ) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [ vis_mfs[-1], vis_samples[-1] ], updates=updates, name='sample_fn' ) # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros( (29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8' ) for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1) ) # construct image image = Image.fromarray(image_data) image.save('samples.png') # end-snippet-7 os.chdir('../')
def main(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) index = T.lscalar() x = T.matrix(name='x') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) dae = DenoisingAutoencoder( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28*28, n_hidden=500 ) cost, updates = dae.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_autoencoder = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], } ) start_time = timeit.default_timer() for epoch in xrange(training_epochs): costs = [] for batch_index in xrange(n_train_batches): costs.append(train_autoencoder(batch_index)) print 'Training epoch %d, mean cost ' % epoch, np.mean(costs) end_time = timeit.default_timer() training_time = end_time - start_time print ( 'The 0% corruption code for file ', os.path.split(__file__)[1], ' ran for %.2fm' %(training_time / 60.) ) image = Image.fromarray( tile_raster_images( X=dae.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_corruption_0.png') dae = DenoisingAutoencoder( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28*28, n_hidden=500, tied=False ) cost, updates = dae.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_autoencoder = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], } ) start_time = timeit.default_timer() for epoch in xrange(training_epochs): costs = [] for batch_index in xrange(n_train_batches): costs.append(train_autoencoder(batch_index)) print 'Training epoch %d, mean cost ' % epoch, np.mean(costs) end_time = timeit.default_timer() training_time = end_time - start_time print ( 'The 30% corruption code for file ', os.path.split(__file__)[1], ' ran for %.2fm' %(training_time / 60.) ) image = Image.fromarray( tile_raster_images( X=dae.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_corruption_30.png') os.chdir('../')
def init_mlp(learning_rate, L1_reg, L2_reg, dataset, batch_size, n_hidden): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] validate_set_x, validate_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_validate_batches = validate_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size batches = (n_train_batches, n_validate_batches, n_test_batches) print '... building the model' index = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = np.random.RandomState(1234) classifier = MultiLayerPerceptron( rng=rng, input=x, n_in=28*28, n_hidden=n_hidden, n_out=10) cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1_norm + L2_reg * classifier.L2_norm ) test_model = theano.function( inputs=[index], outputs=[classifier.errors(y)], givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=[classifier.errors(y)], givens={ x: validate_set_x[index * batch_size: (index + 1) * batch_size], y: validate_set_y[index * batch_size: (index + 1) * batch_size] } ) gparams = [T.grad(cost, param) for param in classifier.params] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], } ) return train_model, validate_model, test_model, batches
def main( finetune_learning_rate=0.1, pretrain_epochs=15, pretrain_learning_rate=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1, output_folder='SdA_plots/' ): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size numpy_rng = np.random.RandomState(123) if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) print '... building the model' sda = StackedDenoisingAutoencoder( numpy_rng=numpy_rng, n_ins=28*28, hidden_layers_sizes=[529, 529], n_outs=10 ) print '... getting the pretraining the model' pretrain_fns = sda.create_pretraining_functions( train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = timeit.default_timer() corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): for epoch in xrange(pretrain_epochs): costs = [] for batch_index in xrange(n_train_batches): costs.append(pretrain_fns[i]( index=batch_index, corruption=corruption_levels[i], learning_rate=pretrain_learning_rate) ) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), np.mean(costs) end_time = timeit.default_timer() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) for i, layer in enumerate(sda.dA_layers): if i == 0: image = Image.fromarray( tile_raster_images( X=layer.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) else: image = Image.fromarray( tile_raster_images( X=layer.W.get_value(borrow=True).T, img_shape=(23, 23), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('dAE_layer_pretrain_%d' % i + '.png') print '... getting the finetunning the model' train_fn, validatate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_learning_rate ) print '... fine-tunning the model' patience = 10 * n_train_batches patience_increase = 2. improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = np.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validatate_model() this_validation_loss = np.mean(validation_losses) print ('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = test_model() test_score = np.mean(test_losses) print ((' epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print ( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) for i, layer in enumerate(sda.dA_layers): if i == 0: image = Image.fromarray( tile_raster_images( X=layer.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) else: image = Image.fromarray( tile_raster_images( X=layer.W.get_value(borrow=True).T, img_shape=(23, 23), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('dAE_layer_finetune_%d' % i + '.png') os.chdir('../')
def main(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() x = T.matrix('x') rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) persistent_chain = theano.shared(np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) rbm = RestrictedBolzmanMachine( input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng ) cost, updates = rbm.get_cost_updates(learning_rate=learning_rate, persistent=persistent_chain, k=15) if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) train_rbm = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] }, name='train_rbm' ) plotting_time = 0. start_time = timeit.default_timer() for epoch in xrange(training_epochs): mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, np.mean(mean_cost) plotting_start = timeit.default_timer() image = Image.fromarray( tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) end_time = timeit.default_timer() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( np.asarray( test_set_x.get_value(borrow=True)[test_idx: test_idx + n_chains], dtype=theano.config.floatX ) ) plot_every = 1000 ( [ presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples ], updates ) = theano.scan( fn=rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every ) updates.update({persistent_vis_chain: vis_samples[-1]}) sample_fn = theano.function( inputs=[], outputs=[ vis_mfs[-1], vis_samples[-1] ], updates=updates, name='sample_fn' ) image_data = np.zeros( (29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8' ) for idx in xrange(n_samples): vis_mf, vis_sample = sample_fn() print '... plotting sample', idx image_data[29 * idx: 29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1) ) image = Image.fromarray(image_data) image.save('sample.png') os.chdir('../')
def main(): datasets = load_data('mnist.pkl.gz') batch_size = 20 train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '... building model' index = T.lscalar() x = T.matrix('x') t = T.ivector('t') layers = [ GeneralLayer(28 * 28, 500), GeneralLayer(500, 500), GeneralLayer(500, 500), GeneralLayer(500, 10, T.nnet.softmax) ] params = [] for i, layer in enumerate(layers): params += layer.params if i == 0: layer_out = layer.forward_prop(x) else: layer_out = layer.forward_prop(layer_out) y = layers[-1].h L1_norm = sum([layer.L1_norm for layer in layers]) L2_norm = sum([layer.L2_norm for layer in layers]) # Negative log likelihood # cost = -T.mean(T.log(y)[T.arange(x.shape[0]), t]) + 0.000 * L1_norm + 0.0001 * L2_norm cost = -T.mean((y[T.arange(x.shape[0])] - t )** 2) error = T.mean(T.neq(T.argmax(y, axis=1), t)) # gparams = [T.grad(cost, param) for param in params] gparams = T.grad(cost, params) gmomentums = [ theano.shared(np.asarray( np.zeros_like(param.get_value(borrow=True)), dtype=theano.config.floatX) ) for param in params ] updates = OrderedDict() # learning_rate = 0.1 learning_rate = 0.01 momentum = np.float(0.9) for param, gparam, gmomentum in zip(params, gparams, gmomentums): updates[gmomentum] = momentum * gmomentum - learning_rate * gparam updates[param] = param + updates[gmomentum] # updates[param] = param - learning_rate * gparam train = theano.function( inputs=[index], outputs=[cost], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], t: train_set_y[index * batch_size: (index + 1) * batch_size] } ) valid = theano.function( inputs=[index], outputs=[error], givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], t: valid_set_y[index * batch_size: (index + 1) * batch_size], } ) for epoch in xrange(500): for minibatch_index in xrange(n_train_batches): train(minibatch_index) iter = epoch * n_train_batches + minibatch_index if (iter + 1) % n_train_batches == 0: validation_losses = [valid(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) print ("EPOCH:: %i, Validation cost: %f" % (epoch+1, this_validation_loss))