Example #1
0
def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
			 pretrain_lr=0.01, k=1, training_epochs=1000,
			 dataset='mnist.pkl.gz', batch_size=10):
	"""
	Demonstrates how to train and test a Deep Belief Network.

	This is demonstrated on MNIST.

	:type learning_rate: float
	:param learning_rate: learning rate used in the finetune stage
	:type pretraining_epochs: int
	:param pretraining_epochs: number of epoch to do pretraining
	:type pretrain_lr: float
	:param pretrain_lr: learning rate to be used during pre-training
	:type k: int
	:param k: number of Gibbs steps in CD/PCD
	:type training_epochs: int
	:param training_epochs: maximal number of iterations ot run the optimizer
	:type dataset: string
	:param dataset: path the the pickled dataset
	:type batch_size: int
	:param batch_size: the size of a minibatch
	"""

	datasets = load_data(dataset)

	train_set_x, train_set_y = datasets[0]
	valid_set_x, valid_set_y = datasets[1]
	test_set_x, test_set_y = datasets[2]

	# compute number of minibatches for training, validation and testing
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

	# numpy random generator
	numpy_rng = numpy.random.RandomState(123)
	print '... building the model'
	# construct the Deep Belief Network
	dbn = DBN(numpy_rng=numpy_rng, n_ins=32 * 32,
			  hidden_layers_sizes=[1200, 1200, 1200],
			  n_outs=10)

	#########################
	# PRETRAINING THE MODEL #
	#########################
	print '... getting the pretraining functions'
	pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
												batch_size=batch_size,
												k=k)

	print '... pre-training the model'
	start_time = time.clock()
	## Pre-train layer-wise
	for i in xrange(dbn.n_layers):
		# go through pretraining epochs
		for epoch in xrange(pretraining_epochs):
			# go through the training set
			c = []
			for batch_index in xrange(n_train_batches):
				c.append(pretraining_fns[i](index=batch_index,
											lr=pretrain_lr))
			print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
			print numpy.mean(c)

	end_time = time.clock()
	print >> sys.stderr, ('The pretraining code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))

	########################
	# FINETUNING THE MODEL #
	########################

	# get the training, validation and testing function for the model
	print '... getting the finetuning functions'
	train_fn, validate_model, test_model = dbn.build_finetune_functions(
				datasets=datasets, batch_size=batch_size,
				learning_rate=finetune_lr)

	print '... finetunning the model'
	# early-stopping parameters
	patience = 4 * n_train_batches  # look as this many examples regardless
	patience_increase = 2.	# wait this much longer when a new best is
							  # found
	improvement_threshold = 0.995  # a relative improvement of this much is
								   # considered significant
	validation_frequency = min(n_train_batches, patience / 2)
								  # go through this many
								  # minibatche before checking the network
								  # on the validation set; in this case we
								  # check every epoch

	best_params = None
	best_validation_loss = numpy.inf
	test_score = 0.
	start_time = time.clock()

	done_looping = False
	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			minibatch_avg_cost = train_fn(minibatch_index)
			iter = (epoch - 1) * n_train_batches + minibatch_index

			if (iter + 1) % validation_frequency == 0:

				validation_losses = validate_model()
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' % \
					  (epoch, minibatch_index + 1, n_train_batches,
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if (this_validation_loss < best_validation_loss *
						improvement_threshold):
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter

					# test it on the test set
					test_losses = test_model()
					test_score = numpy.mean(test_losses)
					print(('	 epoch %i, minibatch %i/%i, test error of '
						   'best model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))

			if patience <= iter:
				done_looping = True
				break

	end_time = time.clock()
	print(('Optimization complete with best validation score of %f %%,'
		   'with test performance %f %%') %
				 (best_validation_loss * 100., test_score * 100.))
	print >> sys.stderr, ('The fine tuning code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time)
											  / 60.))
Example #2
0
def test_rbm(learning_rate=0.1, training_epochs=15,
			 dataset='mnist.pkl.gz', batch_size=20,
			 n_chains=20, n_samples=10, output_folder='rbm_plots',
			 n_hidden=500):
	"""
	Demonstrate how to train and afterwards sample from it using Theano.

	This is demonstrated on MNIST.

	:param learning_rate: learning rate used for training the RBM

	:param training_epochs: number of epochs used for training

	:param dataset: path the the pickled dataset

	:param batch_size: size of a batch used to train the RBM

	:param n_chains: number of parallel Gibbs chains to be used for sampling

	:param n_samples: number of samples to plot for each chain

	"""
	datasets = load_data(dataset)

	train_set_x, train_set_y = datasets[0]
	test_set_x, test_set_y = datasets[2]

	# compute number of minibatches for training, validation and testing
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()	# index to a [mini]batch
	x = T.matrix('x')  # the data is presented as rasterized images

	rng = numpy.random.RandomState(123)
	theano_rng = RandomStreams(rng.randint(2 ** 30))

	# initialize storage for the persistent chain (state = hidden
	# layer of chain)
	persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
												 dtype=theano.config.floatX),
									 borrow=True)

	# construct the RBM class
	rbm = RBM(input=x, n_visible=32 * 32,
			  n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

	# get the cost and the gradient corresponding to one step of CD-15
	cost, updates = rbm.get_cost_updates(lr=learning_rate,
										 persistent=persistent_chain, k=15)

	#################################
	#	 Training the RBM		  #
	#################################
	if not os.path.isdir(output_folder):
		os.makedirs(output_folder)
	os.chdir(output_folder)

	# it is ok for a theano function to have no output
	# the purpose of train_rbm is solely to update the RBM parameters
	train_rbm = theano.function([index], cost,
		   updates=updates,
		   givens={x: train_set_x[index * batch_size:
								  (index + 1) * batch_size]},
		   name='train_rbm')

	plotting_time = 0.
	start_time = time.clock()

	# go through training epochs
	for epoch in xrange(training_epochs):

		# go through the training set
		mean_cost = []
		for batch_index in xrange(n_train_batches):
			mean_cost += [train_rbm(batch_index)]

		print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)

		# Plot filters after each training epoch
		plotting_start = time.clock()
		# Construct image from the weight matrix
		image = PIL.Image.fromarray(tile_raster_images(
				 X=rbm.W.get_value(borrow=True).T,
				 img_shape=(32, 32), tile_shape=(10, 10),
				 tile_spacing=(1, 1)))
		image.save('filters_at_epoch_%i.png' % epoch)
		plotting_stop = time.clock()
		plotting_time += (plotting_stop - plotting_start)

	end_time = time.clock()

	pretraining_time = (end_time - start_time) - plotting_time

	print ('Training took %f minutes' % (pretraining_time / 60.))

	#################################
	#	 Sampling from the RBM	 #
	#################################
	# find out the number of test samples
	number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

	# pick random test examples, with which to initialize the persistent chain
	test_idx = rng.randint(number_of_test_samples - n_chains)
	persistent_vis_chain = theano.shared(numpy.asarray(
			test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
			dtype=theano.config.floatX))

	plot_every = 1000
	# define one step of Gibbs sampling (mf = mean-field) define a
	# function that does `plot_every` steps before returning the
	# sample for plotting
	[presig_hids, hid_mfs, hid_samples, presig_vis,
	 vis_mfs, vis_samples], updates =  \
						theano.scan(rbm.gibbs_vhv,
								outputs_info=[None,  None, None, None,
											  None, persistent_vis_chain],
								n_steps=plot_every)

	# add to updates the shared variable that takes care of our persistent
	# chain :.
	updates.update({persistent_vis_chain: vis_samples[-1]})
	# construct the function that implements our persistent chain.
	# we generate the "mean field" activations for plotting and the actual
	# samples for reinitializing the state of our persistent chain
	sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]],
								updates=updates,
								name='sample_fn')

	# create a space to store the image for plotting ( we need to leave
	# room for the tile_spacing as well)
	image_data = numpy.zeros((33 * n_samples + 1, 33 * n_chains - 1),
							 dtype='uint8')
	for idx in xrange(n_samples):
		# generate `plot_every` intermediate samples that we discard,
		# because successive samples in the chain are too correlated
		vis_mf, vis_sample = sample_fn()
		print ' ... plotting sample ', idx
		image_data[33 * idx:33 * idx + 32, :] = tile_raster_images(
				X=vis_mf,
				img_shape=(32, 32),
				tile_shape=(1, n_chains),
				tile_spacing=(1, 1))
		# construct image

	image = PIL.Image.fromarray(image_data)
	image.save('samples.png')
	os.chdir('../')