def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='cifar-10-batches-py', nkerns=[20, 50], batch_size=500, mode='train', amount='full'): """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) datasets = load_data(dataset, mode=mode, amount=amount) if mode == 'train': train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] else: test_set_x, test_set_y = datasets[0] # compute number of minibatches for training, validation and testing if mode == 'train': n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size else: n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (3, 32, 32) # this is the size of CIFIA-10 images (rgb-scaled) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,32*32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # ここの1を3に変える(カラーだから) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1,32-5+1)=(28,28) # maxpooling reduces this further to (28/2,28/2) = (14,14) # 4D output tensor is thus of shape (batch_size,nkerns[0],14,14) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1,14-5+1)=(10,10) # maxpooling reduces this further to (10/2,10/2) = (5,5) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],5,5) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,50*5*5) = (20,1250) <-?? layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) ## load the saved parameters if mode == 'test': learned_params = unpickle('params/convolutional_mlp_color.pkl') # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model if mode == 'test': test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) else: validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) # create a function to get the labels predicted by the model if mode == 'test': get_test_labels = theano.function([index], layer3.y_pred, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], layer0.W: learned_params[0], layer0.b: learned_params[1], layer1.W: learned_params[2], layer1.b: learned_params[3], layer2.W: learned_params[4], layer2.b: learned_params[5], layer3.W: learned_params[6], layer3.b: learned_params[7]}) if mode == 'train': # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. if mode == 'train': updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters if mode == 'train': patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch start_time = time.clock() if mode == 'train': best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. done_looping = False else: done_looping = True epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter cost_ij = train_model(minibatch_index) ## save the parameters if mode == 'train': get_params = theano.function(inputs=[], outputs=[layer0.W, layer0.b, layer1.W, layer1.b, layer2.W, layer2.b, layer3.W, layer3.b]) save_parameters(get_params(), 'convolutional_mlp_color') if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter ''' if patience <= iter: done_looping = True break ''' if mode == 'test': print 'predicting the labels...' pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)] for i in xrange(n_test_batches): print str(i+1), '/', str(n_test_batches) pred_labels[i] = get_test_labels(i) writer = csv.writer(file('result/convolutional_mlp_color.csv', 'w')) row = 1 print 'output test labels...' for i in xrange(len(pred_labels)): # TBF: hard code print str(i+1), '/', str(len(pred_labels)) for j in xrange(len(pred_labels[i])): writer.writerow([row, pred_labels[i][j]]) row += 1 end_time = time.clock() if mode == 'train': print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100000, dataset='cifar-10-batches-py', batch_size=32, test_batch_size=32, n_hidden_1=500, n_hidden_2=500, mode='train', amount='full', valid_num=10000): #batch_size: 32 datasets = load_data(dataset, mode, amount, valid_num) if mode == 'train': train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] else: test_set_x, test_set_y = datasets[0] # compute number of minibatches for training, validation and testing if mode == 'train': n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size else: n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=769, n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2, n_out=2) ## load the saved parameters if mode == 'test': learned_params = unpickle('params/mlp.pkl') # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch if mode == 'test': test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size], y: test_set_y[index * test_batch_size: (index + 1) * test_batch_size]}) else: validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) train_error_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) get_train_labels = theano.function([index], classifier.log_regression_layer.ex_y, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size]}) if mode == 'test': get_test_labels = theano.function([index], classifier.log_regression_layer.y_pred, givens={ x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size], classifier.hidden_layer_1.W: learned_params[0], classifier.hidden_layer_1.b: learned_params[1], classifier.log_regression_layer.W: learned_params[2], classifier.log_regression_layer.b: learned_params[3]}) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams if mode == 'train': gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #init_bias = [-1. for i in xrange(101)] ##init_bias = numpy.asarray(init_bias, dtype=numpy.float64) #init_bias[0] = 100. #initialize_bias = theano.function(inputs=[], outputs=classifier.logRegressionLayer.b, # updates={classifier.logRegressionLayer.b: init_bias}, # givens={classifier.logRegressionLayer.b: init_bias}) #bias = initialize_bias() #print bias ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant if mode == 'train': validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 if mode == 'train': done_looping = False else: done_looping = True while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) train_losses = [train_error_model(i) for i in xrange(n_train_batches)] this_train_loss = numpy.mean(train_losses) try: pred_labels = pred_labels except NameError: pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_train_batches)] #params = get_params() #print 'W[0:10]:', params[0][0:10], 'b[0:10]:', params[1][0:10] if mode == 'train': for i in xrange(n_train_batches): pred_labels[i] = get_train_labels(i) #print 'max predicted labels:', #for i in xrange(len(pred_labels)): # print max(pred_labels[i]), #print print('epoch %i, minibatch %i/%i, validation error (MAE) %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) print('epoch %i, minibatch %i/%i, training error (MAE) %f' % \ (epoch, minibatch_index + 1, n_train_batches, this_train_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: ## save the parameters get_params = theano.function(inputs=[], outputs=[classifier.hidden_layer_1.W, classifier.hidden_layer_1.b, classifier.log_regression_layer.W, classifier.log_regression_layer.b]) save_parameters(get_params(), 'mlp') #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break if mode == 'train': for i in xrange(n_train_batches): pred_labels[i] = get_train_labels(i) print 'max predicted labels:', for i in xrange(len(pred_labels)): print max(pred_labels[i]), print if mode == 'test': print 'predicting the labels...' pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)] for i in xrange(n_test_batches): print str(i + 1), '/', str(n_test_batches) pred_labels[i] = get_test_labels(i) writer = csv.writer(file('result/mlp.csv', 'w')) writer.writerow(['id', 'loss']) row = 105472 # first ID of test data print 'output test labels...' for i in xrange(len(pred_labels)): print str(i + 1), '/', str(len(pred_labels)) for j in xrange(len(pred_labels[i])): writer.writerow([row, pred_labels[i][j]]) row += 1 end_time = time.clock() print(('Optimization complete. Best validation score of %f ' 'obtained at iteration %i') % (best_validation_loss, best_iter + 1)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))