def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
					dataset='cifar-10-batches-py',
					nkerns=[20, 50], batch_size=500, mode='train', amount='full'):
	""" Demonstrates lenet on MNIST dataset

	:type learning_rate: float
	:param learning_rate: learning rate used (factor for the stochastic
						  gradient)

	:type n_epochs: int
	:param n_epochs: maximal number of epochs to run the optimizer

	:type dataset: string
	:param dataset: path to the dataset used for training /testing (MNIST here)

	:type nkerns: list of ints
	:param nkerns: number of kernels on each layer
	"""

	rng = numpy.random.RandomState(23455)

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
		n_valid_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of
						# [int] labels

	ishape = (3, 32, 32)  # this is the size of CIFIA-10 images (rgb-scaled)

	######################
	# BUILD ACTUAL MODEL #
	######################
	print '... building the model'

	# Reshape matrix of rasterized images of shape (batch_size,32*32)
	# to a 4D tensor, compatible with our LeNetConvPoolLayer
	layer0_input = x.reshape((batch_size, 3, 32, 32)) # ここの1を3に変える(カラーだから)

	# Construct the first convolutional pooling layer:
	# filtering reduces the image size to (32-5+1,32-5+1)=(28,28)
	# maxpooling reduces this further to (28/2,28/2) = (14,14)
	# 4D output tensor is thus of shape (batch_size,nkerns[0],14,14)
	layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
			image_shape=(batch_size, 3, 32, 32),
			filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2))

	# Construct the second convolutional pooling layer
	# filtering reduces the image size to (14-5+1,14-5+1)=(10,10)
	# maxpooling reduces this further to (10/2,10/2) = (5,5)
	# 4D output tensor is thus of shape (nkerns[0],nkerns[1],5,5)
	layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
			image_shape=(batch_size, nkerns[0], 14, 14),
			filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

	# the HiddenLayer being fully-connected, it operates on 2D matrices of
	# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
	# This will generate a matrix of shape (20,50*5*5) = (20,1250) <-??
	layer2_input = layer1.output.flatten(2)

	# construct a fully-connected sigmoidal layer
	layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5,
						 n_out=500, activation=T.tanh)

	# classify the values of the fully-connected sigmoidal layer
	layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/convolutional_mlp_color.pkl')

	# the cost we minimize during training is the NLL of the model
	cost = layer3.negative_log_likelihood(y)

	# create a function to compute the mistakes that are made by the model
	if mode == 'test':
		test_model = theano.function([index], layer3.errors(y),
				 givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					y: test_set_y[index * batch_size: (index + 1) * batch_size]})
	else:
		validate_model = theano.function([index], layer3.errors(y),
				givens={
					x: valid_set_x[index * batch_size: (index + 1) * batch_size],
					y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], layer3.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					layer0.W: learned_params[0],
					layer0.b: learned_params[1],
					layer1.W: learned_params[2],
					layer1.b: learned_params[3],
					layer2.W: learned_params[4],
					layer2.b: learned_params[5],
					layer3.W: learned_params[6],
					layer3.b: learned_params[7]})

	if mode == 'train':
		# create a list of all model parameters to be fit by gradient descent
		params = layer3.params + layer2.params + layer1.params + layer0.params
	
		# create a list of gradients for all model parameters
		grads = T.grad(cost, params)

	# train_model is a function that updates the model parameters by
	# SGD Since this model has many parameters, it would be tedious to
	# manually create an update rule for each model parameter. We thus
	# create the updates list by automatically looping over all
	# (params[i],grads[i]) pairs.
	if mode == 'train':
		updates = []
		for param_i, grad_i in zip(params, grads):
			updates.append((param_i, param_i - learning_rate * grad_i))
	
		train_model = theano.function([index], cost, updates=updates,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})
	
	###############
	# TRAIN MODEL #
	###############
	print '... training'
	# early-stopping parameters
	if mode == 'train':
		patience = 10000  # look as this many examples regardless
		patience_increase = 2  # wait this much longer when a new best is
							   # found
		improvement_threshold = 0.995  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
									  # go through this many
									  # minibatche before checking the network
									  # on the validation set; in this case we
									  # check every epoch

	start_time = time.clock()
	if mode == 'train':
		best_params = None
		best_validation_loss = numpy.inf
		best_iter = 0
		test_score = 0.
		done_looping = False
	else:
		done_looping = True 

	epoch = 0

	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if iter % 100 == 0:
				print 'training @ iter = ', iter
			cost_ij = train_model(minibatch_index)

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[], outputs=[layer0.W, layer0.b, layer1.W, layer1.b, layer2.W, layer2.b, layer3.W, layer3.b])
				save_parameters(get_params(), 'convolutional_mlp_color')

			if (iter + 1) % validation_frequency == 0:

				# compute zero-one loss on validation set
				validation_losses = [validate_model(i) for i
									 in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' % \
					  (epoch, minibatch_index + 1, n_train_batches, \
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss *  \
					   improvement_threshold:
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter


			'''
			if patience <= iter:
				done_looping = True
				break
			'''

	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/convolutional_mlp_color.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)): # TBF: hard code
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()

	if mode == 'train':
		print('Optimization complete.')
		print('Best validation score of %f %% obtained at iteration %i,'\
			  'with test performance %f %%' %
			  (best_validation_loss * 100., best_iter + 1, test_score * 100.))

	print >> sys.stderr, ('The code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))
Beispiel #2
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100000,
             dataset='cifar-10-batches-py', batch_size=32, test_batch_size=32, n_hidden_1=500, n_hidden_2=500, mode='train',
             amount='full', valid_num=10000):  #batch_size: 32

    datasets = load_data(dataset, mode, amount, valid_num)

    if mode == 'train':
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
    else:
        test_set_x, test_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    if mode == 'train':
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    else:
        n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=769,
                     n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2, n_out=2)

    ## load the saved parameters
    if mode == 'test':
        learned_params = unpickle('params/mlp.pkl')


    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = classifier.negative_log_likelihood(y) \
           + L1_reg * classifier.L1 \
           + L2_reg * classifier.L2_sqr

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    if mode == 'test':
        test_model = theano.function(inputs=[index],
                                     outputs=classifier.errors(y),
                                     givens={
                                     x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size],
                                     y: test_set_y[index * test_batch_size: (index + 1) * test_batch_size]})
    else:
        validate_model = theano.function(inputs=[index],
                                         outputs=classifier.errors(y),
                                         givens={
                                         x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                         y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

        train_error_model = theano.function(inputs=[index],
                                            outputs=classifier.errors(y),
                                            givens={
                                            x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                            y: train_set_y[index * batch_size:(index + 1) * batch_size]})

        get_train_labels = theano.function([index], classifier.log_regression_layer.ex_y,
                                           givens={
                                           x: train_set_x[index * batch_size: (index + 1) * batch_size]})

    if mode == 'test':
        get_test_labels = theano.function([index], classifier.log_regression_layer.y_pred,
                                          givens={
                                          x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size],
                                          classifier.hidden_layer_1.W: learned_params[0],
                                          classifier.hidden_layer_1.b: learned_params[1],
                                          classifier.log_regression_layer.W: learned_params[2],
                                          classifier.log_regression_layer.b: learned_params[3]})

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    if mode == 'train':
        gparams = []
        for param in classifier.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs
        updates = []
        # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
        # same length, zip generates a list C of same size, where each element
        # is a pair formed from the two lists :
        #	C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
        for param, gparam in zip(classifier.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(inputs=[index], outputs=cost,
                                      updates=updates,
                                      givens={
                                      x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                      y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    #init_bias = [-1. for i in xrange(101)]
    ##init_bias = numpy.asarray(init_bias, dtype=numpy.float64)
    #init_bias[0] = 100.
    #initialize_bias = theano.function(inputs=[], outputs=classifier.logRegressionLayer.b,
    #		updates={classifier.logRegressionLayer.b: init_bias},
    #		givens={classifier.logRegressionLayer.b: init_bias})

    #bias = initialize_bias()
    #print bias


    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.999  # a relative improvement of this much is
    # considered significant
    if mode == 'train':
        validation_frequency = min(n_train_batches, patience / 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    if mode == 'train':
        done_looping = False
    else:
        done_looping = True

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                train_losses = [train_error_model(i)
                                for i in xrange(n_train_batches)]
                this_train_loss = numpy.mean(train_losses)

                try:
                    pred_labels = pred_labels
                except NameError:
                    pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_train_batches)]

                #params = get_params()
                #print 'W[0:10]:', params[0][0:10], 'b[0:10]:', params[1][0:10]

                if mode == 'train':
                    for i in xrange(n_train_batches):
                        pred_labels[i] = get_train_labels(i)

                    #print 'max predicted labels:',
                    #for i in xrange(len(pred_labels)):
                    #	print max(pred_labels[i]),
                    #print

                print('epoch %i, minibatch %i/%i, validation error (MAE) %f' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss))

                print('epoch %i, minibatch %i/%i, training error (MAE) %f' % \
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_train_loss))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    ## save the parameters
                    get_params = theano.function(inputs=[], outputs=[classifier.hidden_layer_1.W, classifier.hidden_layer_1.b,
                                                                     classifier.log_regression_layer.W,
                                                                     classifier.log_regression_layer.b])
                    save_parameters(get_params(), 'mlp')

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

            if patience <= iter:
                done_looping = True
                break

    if mode == 'train':
        for i in xrange(n_train_batches):
            pred_labels[i] = get_train_labels(i)

        print 'max predicted labels:',
        for i in xrange(len(pred_labels)):
            print max(pred_labels[i]),
        print

    if mode == 'test':
        print 'predicting the labels...'
        pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
        for i in xrange(n_test_batches):
            print str(i + 1), '/', str(n_test_batches)
            pred_labels[i] = get_test_labels(i)

        writer = csv.writer(file('result/mlp.csv', 'w'))
        writer.writerow(['id', 'loss'])
        row = 105472  # first ID of test data

        print 'output test labels...'
        for i in xrange(len(pred_labels)):
            print str(i + 1), '/', str(len(pred_labels))
            for j in xrange(len(pred_labels[i])):
                writer.writerow([row, pred_labels[i][j]])
                row += 1

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f '
           'obtained at iteration %i') %
          (best_validation_loss, best_iter + 1))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))