예제 #1
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=20, ## originally 15
			 pretrain_lr=0.001, training_epochs=1000,
			 dataset='cifar-10-batches-py', batch_size=1, mode='train', amount='full'):
	"""
	Demonstrates how to train and test a stochastic denoising autoencoder.

	This is demonstrated on MNIST.

	:type learning_rate: float
	:param learning_rate: learning rate used in the finetune stage
	(factor for the stochastic gradient)

	:type pretraining_epochs: int
	:param pretraining_epochs: number of epoch to do pretraining

	:type pretrain_lr: float
	:param pretrain_lr: learning rate to be used during pre-training

	:type n_iter: int
	:param n_iter: maximal number of iterations ot run the optimizer

	:type dataset: string
	:param dataset: path the the pickled dataset

	"""

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# numpy random generator
	numpy_rng = numpy.random.RandomState(89677)


	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of

	print '... building the model'
	# construct the stacked denoising autoencoder class
	sda = SdA(numpy_rng=numpy_rng, n_ins=32 * 32,
			  hidden_layers_sizes=[1300, 1300, 1300],
			  n_outs=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/SdA.pkl')


	print '... getting the pretraining functions'
	if mode == 'train':
		pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
													batch_size=batch_size)


	#########################
	# PRETRAINING THE MODEL #
	#########################

	if mode == 'train':
		print '... pre-training the model'
		start_time = time.clock()
		## Pre-train layer-wise
		corruption_levels = [.1, .2, .3]
		for i in xrange(sda.n_layers):
			# go through pretraining epochs
			for epoch in xrange(pretraining_epochs):
				# go through the training set
				c = []
				for batch_index in xrange(n_train_batches):
					c.append(pretraining_fns[i](index=batch_index,
							 corruption=corruption_levels[i],
							 lr=pretrain_lr))
				print 'Pre-training layer %i, epoch %d / %d, cost ' % (i, epoch + 1, pretraining_epochs),
				print numpy.mean(c)

		end_time = time.clock()

		print >> sys.stderr, ('The pretraining code for file ' +
							  os.path.split(__file__)[1] +
							  ' ran for %.2fm' % ((end_time - start_time) / 60.))

	########################
	# FINETUNING THE MODEL #
	########################

	# get the training, validation and testing function for the model
	print '... getting the finetuning functions'
	if mode == 'train':
		train_fn, validate_model = sda.build_finetune_functions(
					datasets=datasets, batch_size=batch_size,
					learning_rate=finetune_lr)

	print '... finetunning the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 10 * n_train_batches  # look as this many examples regardless
		patience_increase = 2.  # wait this much longer when a new best is
								# found
		improvement_threshold = 0.995  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
									  # go through this many
									  # minibatche before checking the network
									  # on the validation set; in this case we
									  # check every epoch


	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], sda.logLayer.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					sda.sigmoid_layers[0].W: learned_params[0],
					sda.sigmoid_layers[0].b: learned_params[1],
					sda.sigmoid_layers[1].W: learned_params[2],
					sda.sigmoid_layers[1].b: learned_params[3],
					sda.sigmoid_layers[2].W: learned_params[4],
					sda.sigmoid_layers[2].b: learned_params[5],
					sda.logLayer.W: learned_params[6],
					sda.logLayer.b: learned_params[7]})


	best_params = None
	best_validation_loss = numpy.inf
	test_score = 0.
	start_time = time.clock()

	if mode == 'train':
		done_looping = False
	else:
		done_looping = True

	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):
			minibatch_avg_cost = train_fn(minibatch_index)

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[],
						outputs=[sda.sigmoid_layers[0].W, sda.sigmoid_layers[0].b,
							sda.sigmoid_layers[1].W, sda.sigmoid_layers[1].b,
							sda.sigmoid_layers[2].W, sda.sigmoid_layers[2].b,
							sda.logLayer.W, sda.logLayer.b])


				save_parameters(get_params(), 'SdA')

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if (iter + 1) % validation_frequency == 0:
				validation_losses = validate_model()
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' %
					  (epoch, minibatch_index + 1, n_train_batches,
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if (this_validation_loss < best_validation_loss *
						improvement_threshold):
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter


			if patience <= iter:
				done_looping = True
				break


	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/SdA.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)):
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	print(('Optimization complete with best validation score of %f %%,'
		   'with test performance %f %%') %
				 (best_validation_loss * 100., test_score * 100.))
	print >> sys.stderr, ('The training code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))
def evaluate_lenet5(learning_rate=0.1, learning_rate2=0.05, learning_rate3=0.01, n_epochs=200,
					dataset='cifar-10-batches-py',
					nkerns=[6, 16], batch_size=20, mode='train', amount='full'): # nkerns coule be ok with [10, 50]
	""" Demonstrates lenet on MNIST dataset

	:type learning_rate: float
	:param learning_rate: learning rate used (factor for the stochastic
						  gradient)

	:type n_epochs: int
	:param n_epochs: maximal number of epochs to run the optimizer

	:type dataset: string
	:param dataset: path to the dataset used for training /testing (MNIST here)

	:type nkerns: list of ints
	:param nkerns: number of kernels on each layer
	"""

	#learning_rate = theano.shared(value=learning_rate, borrow=True)

	rng = numpy.random.RandomState(23455)

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
		n_valid_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of
						# [int] labels

	ishape = (32, 32)  # this is the size of CIFIA-10 images (gray-scaled)

	######################
	# BUILD ACTUAL MODEL #
	######################
	print '... building the model'

	# Reshape matrix of rasterized images of shape (batch_size,32*32)
	# to a 4D tensor, compatible with our LeNetConvPoolLayer
	layer0_input = x.reshape((batch_size, 1, 32, 32))

	# Construct the first convolutional pooling layer:
	# filtering reduces the image size to (32-5+1,32-5+1)=(28,28)
	# maxpooling reduces this further to (28/2,28/2) = (14,14)
	# 4D output tensor is thus of shape (batch_size,nkerns[0],14,14)
	layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
			image_shape=(batch_size, 1, 32, 32),
			filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

	# Construct the second convolutional pooling layer
	# filtering reduces the image size to (14-5+1,14-5+1)=(10,10)
	# maxpooling reduces this further to (10/2,10/2) = (5,5)
	# 4D output tensor is thus of shape (nkerns[0],nkerns[1],5,5)
	layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
			image_shape=(batch_size, nkerns[0], 14, 14),
			filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

	# the HiddenLayer being fully-connected, it operates on 2D matrices of
	# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
	# This will generate a matrix of shape (20,50*5*5) = (20,1250) <-??
	layer2_input = layer1.output.flatten(2)

	# construct a fully-connected sigmoidal layer
	layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5,
						 n_out=500, activation=T.tanh)

	# classify the values of the fully-connected sigmoidal layer
	layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/convolutional_mlp_gray.pkl')

	# the cost we minimize during training is the NLL of the model
	cost = layer3.negative_log_likelihood(y)

	# create a function to compute the mistakes that are made by the model
	if mode == 'test':
		test_model = theano.function([index], layer3.errors(y),
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					y: test_set_y[index * batch_size: (index + 1) * batch_size]})
	else:
		validate_model = theano.function([index], layer3.errors(y),
				givens={
					x: valid_set_x[index * batch_size: (index + 1) * batch_size],
					y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

		check_label = theano.function(inputs=[index],
				outputs=layer3.y_pair(y),
					givens={
						x: train_set_x[index * batch_size: (index + 1) * batch_size],
						y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], layer3.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					layer0.W: learned_params[0],
					layer0.b: learned_params[1],
					layer1.W: learned_params[2],
					layer1.b: learned_params[3],
					layer2.W: learned_params[4],
					layer2.b: learned_params[5],
					layer3.W: learned_params[6],
					layer3.b: learned_params[7]})


	if mode == 'train':
		# create a list of all model parameters to be fit by gradient descent
		params = layer3.params + layer2.params + layer1.params + layer0.params
	
		# create a list of gradients for all model parameters
		grads = T.grad(cost, params)

	# train_model is a function that updates the model parameters by
	# SGD Since this model has many parameters, it would be tedious to
	# manually create an update rule for each model parameter. We thus
	# create the updates list by automatically looping over all
	# (params[i],grads[i]) pairs.
	if mode == 'train':
		updates = []
		for param_i, grad_i in zip(params, grads):
			updates.append((param_i, param_i - learning_rate * grad_i))

		updates2 = []
		for param_i, grad_i in zip(params, grads):
			updates2.append((param_i, param_i - learning_rate2 * grad_i))

		updates3 = []
		for param_i, grad_i in zip(params, grads):
			updates3.append((param_i, param_i - learning_rate3 * grad_i))

	if mode == 'train':
		train_model = theano.function([index], cost, updates=updates,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})
		
		train_model2 = theano.function([index], cost, updates=updates2,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})

		train_model3 = theano.function([index], cost, updates=updates3,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	###############
	# TRAIN MODEL #
	###############
	print '... training the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 10000  # look as this many examples regardless
		patience_increase = 2  # wait this much longer when a new best is
							   # found
		improvement_threshold = 0.999  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
								  # go through this many
								  # minibatche before checking the network
								  # on the validation set; in this case we
								  # check every epoch

	start_time = time.clock()

	if mode == 'train':
		best_params = None
		best_validation_loss = numpy.inf
		best_iter = 0
		test_score = 0.
		done_looping = False
	else:
		done_looping = True

	epoch = 0

	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if iter % 100 == 0:
				print 'training @ iter = ', iter

			if epoch == 1:
				cost_ij = train_model(minibatch_index)
			elif this_validation_loss < 0.45 and this_validation_loss > 0.35:
				cost_ij = train_model2(minibatch_index)
			elif this_validation_loss < 0.35:
				cost_ij = train_model3(minibatch_index)
			else:
				cost_ij = train_model(minibatch_index)

			## check the contents of predictions occasionaly
			'''
			if iter % 100 == 0:
				[prediction, true_label] = check_label(minibatch_index)
				print 'prediction:'
				print prediction
				print 'true_label:'
				print true_label
			'''

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[], outputs=[layer0.W, layer0.b, layer1.W, layer1.b, layer2.W, layer2.b, layer3.W, layer3.b])
				save_parameters(get_params(), 'convolutional_mlp_gray')


			if (iter + 1) % validation_frequency == 0:

				# compute zero-one loss on validation set
				validation_losses = [validate_model(i) for i
									 in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' % \
					  (epoch, minibatch_index + 1, n_train_batches, \
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss *  \
					   improvement_threshold:
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter

					'''
					# test it on the test set
					test_losses = [test_model(i) for i in xrange(n_test_batches)]
					test_score = numpy.mean(test_losses)
					print(('	 epoch %i, minibatch %i/%i, test error of best '
						   'model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))
					'''


			'''
			if patience <= iter:
				done_looping = True
				break
			'''


	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/convolutional_mlp_gray.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)): # TBF: hard code
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	if mode == 'train':
		print('Optimization complete.')
		print('Best validation score of %f %% obtained at iteration %i,'\
			  'with test performance %f %%' %
			  (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	print >> sys.stderr, ('The code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))
예제 #3
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
			 dataset='cifar-10-batches-py', batch_size=20, n_hidden=500):
	"""
	Demonstrate stochastic gradient descent optimization for a multilayer
	perceptron

	This is demonstrated on MNIST.

	:type learning_rate: float
	:param learning_rate: learning rate used (factor for the stochastic
	gradient

	:type L1_reg: float
	:param L1_reg: L1-norm's weight when added to the cost (see
	regularization)

	:type L2_reg: float
	:param L2_reg: L2-norm's weight when added to the cost (see
	regularization)

	:type n_epochs: int
	:param n_epochs: maximal number of epochs to run the optimizer

	:type dataset: string
	:param dataset: the path of the MNIST dataset file from
				 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
	datasets = load_data(dataset)

	train_set_x, train_set_y = datasets[0]
	valid_set_x, valid_set_y = datasets[1]
	test_set_x, test_set_y = datasets[2]

	# compute number of minibatches for training, validation and testing
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
	n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
	n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

	######################
	# BUILD ACTUAL MODEL #
	######################
	print '... building the model'

	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')  # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of
						# [int] labels

	rng = numpy.random.RandomState(1234)

	# construct the MLP class
	classifier = MLP(rng=rng, input=x, n_in=32 * 32,
					 n_hidden=n_hidden, n_out=10)

	# the cost we minimize during training is the negative log likelihood of
	# the model plus the regularization terms (L1 and L2); cost is expressed
	# here symbolically
	cost = classifier.negative_log_likelihood(y) \
		 + L1_reg * classifier.L1 \
		 + L2_reg * classifier.L2_sqr

	# compiling a Theano function that computes the mistakes that are made
	# by the model on a minibatch
	test_model = theano.function(inputs=[index],
			outputs=classifier.errors(y),
			givens={
				x: test_set_x[index * batch_size:(index + 1) * batch_size],
				y: test_set_y[index * batch_size:(index + 1) * batch_size]})

	validate_model = theano.function(inputs=[index],
			outputs=classifier.errors(y),
			givens={
				x: valid_set_x[index * batch_size:(index + 1) * batch_size],
				y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

	# compute the gradient of cost with respect to theta (sotred in params)
	# the resulting gradients will be stored in a list gparams
	gparams = []
	for param in classifier.params:
		gparam = T.grad(cost, param)
		gparams.append(gparam)

	# specify how to update the parameters of the model as a list of
	# (variable, update expression) pairs
	updates = []
	# given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
	# same length, zip generates a list C of same size, where each element
	# is a pair formed from the two lists :
	#	C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
	for param, gparam in zip(classifier.params, gparams):
		updates.append((param, param - learning_rate * gparam))

	# compiling a Theano function `train_model` that returns the cost, but
	# in the same time updates the parameter of the model based on the rules
	# defined in `updates`
	train_model = theano.function(inputs=[index], outputs=cost,
			updates=updates,
			givens={
				x: train_set_x[index * batch_size:(index + 1) * batch_size],
				y: train_set_y[index * batch_size:(index + 1) * batch_size]})

	###############
	# TRAIN MODEL #
	###############
	print '... training'

	# early-stopping parameters
	patience = 10000  # look as this many examples regardless
	patience_increase = 2  # wait this much longer when a new best is
						   # found
	improvement_threshold = 0.999  # a relative improvement of this much is
								   # considered significant
	validation_frequency = min(n_train_batches, patience / 2)
								  # go through this many
								  # minibatche before checking the network
								  # on the validation set; in this case we
								  # check every epoch

	best_params = None
	best_validation_loss = numpy.inf
	best_iter = 0
	test_score = 0.
	start_time = time.clock()

	epoch = 0
	done_looping = False

	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			minibatch_avg_cost = train_model(minibatch_index)
			# iteration number
			iter = (epoch - 1) * n_train_batches + minibatch_index

			if (iter + 1) % validation_frequency == 0:
				# compute zero-one loss on validation set
				validation_losses = [validate_model(i) for i
									 in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)

				print('epoch %i, minibatch %i/%i, validation error %f %%' %
					 (epoch, minibatch_index + 1, n_train_batches,
					  this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:
					#improve patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss *  \
						   improvement_threshold:
						patience = max(patience, iter * patience_increase)

					best_validation_loss = this_validation_loss
					best_iter = iter

					# test it on the test set
					test_losses = [test_model(i) for i
								   in xrange(n_test_batches)]
					test_score = numpy.mean(test_losses)

					print(('	 epoch %i, minibatch %i/%i, test error of '
						   'best model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))

			if patience <= iter:
					done_looping = True
					break

	end_time = time.clock()
	print(('Optimization complete. Best validation score of %f %% '
		   'obtained at iteration %i, with test performance %f %%') %
		  (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	print >> sys.stderr, ('The code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))