def test_params(learning_rate, n_epochs, window_size, datasets, output_folder, base_folder): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on ICHI. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer """ # split the datasets (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of examples given in datasets n_train_samples = train_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_test_samples = test_set_x.get_value(borrow=True).shape[0] - window_size + 1 ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as window with x, y, x for each sample y = T.iscalar('y') # labels, presented as int label # construct the logistic regression class # Each ICHI input has size window_size*3 classifier = LogisticRegression(input=x, n_in=window_size*3, n_out=7) classifier.print_log_reg_types() # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) predict = classifier.predict() # compiling a Theano function that computes the mistakes that are made by # the model on a row test_model = theano.function( inputs=[index], outputs=[classifier.errors(y), predict, y], givens={ x: test_set_x[index: index + window_size], y: test_set_y[index + window_size - 1] } ) validate_model = theano.function( inputs=[index], outputs=[classifier.errors(y), predict, y], givens={ x: valid_set_x[index: index + window_size], y: valid_set_y[index + window_size - 1] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=[cost, classifier.errors(y), predict, y], updates=updates, givens={ x: train_set_x[index: index + window_size], y: train_set_y[index + window_size - 1] } ) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = n_train_samples*2 # look as this many examples regardless patience_increase = 25 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = patience / 4 best_validation_loss = numpy.inf start_time = time.clock() done_looping = False epoch = 0 iter = 0 train_cost_array = [] train_error_array = [] valid_error_array = [] test_error_array = [] cur_train_cost =[] cur_train_error = [] train_confusion_matrix = numpy.zeros((7, 7)) valid_confusion_matrix = numpy.zeros((7, 7)) print(n_train_samples, 'train_samples') while (epoch < n_epochs) and (not done_looping): train_confusion_matrix = zero_in_array(train_confusion_matrix) for index in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = train_model(index) # iteration number iter = epoch * n_train_samples + index cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) train_confusion_matrix[cur_actual][cur_pred] += 1 if (iter + 1) % validation_frequency == 0: valid_confusion_matrix = zero_in_array(valid_confusion_matrix) # compute zero-one loss on validation set validation_losses = [] for i in xrange(n_valid_samples): validation_loss, cur_pred, cur_actual = validate_model(i) validation_losses.append(validation_loss) valid_confusion_matrix[cur_actual][cur_pred] += 1 this_validation_loss = float(numpy.mean(validation_losses))*100 valid_error_array.append([]) valid_error_array[-1].append(float(iter)/n_train_samples) valid_error_array[-1].append(this_validation_loss) print( 'epoch %i, iter %i/%i, validation error %f %%' % ( epoch, index + 1, n_train_samples, this_validation_loss ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_result = [test_model(i) for i in xrange(n_test_samples)] test_result = numpy.asarray(test_result) test_losses = test_result[:,0] test_score = float(numpy.mean(test_losses))*100 test_error_array.append([]) test_error_array[-1].append(float(iter)/n_train_samples) test_error_array[-1].append(test_score) print( ( ' epoch %i, iter %i/%i, test error of' ' best model %f %%' ) % ( epoch, index + 1, n_train_samples, test_score ) ) if patience*4 <= iter: done_looping = True print('Done looping') break train_cost_array.append([]) train_cost_array[-1].append(float(iter)/n_train_samples) train_cost_array[-1].append(float(numpy.mean(cur_train_cost))) cur_train_cost =[] train_error_array.append([]) train_error_array[-1].append(float(iter)/n_train_samples) train_error_array[-1].append(float(numpy.mean(cur_train_error)*100)) cur_train_error =[] epoch = epoch + 1 gc.collect() test_confusion_matrix = zero_in_array(numpy.zeros((7, 7))) test_losses = [] for i in xrange(n_test_samples): test_loss, cur_pred, cur_actual = test_model(i) test_losses.append(test_loss) test_confusion_matrix[cur_actual][cur_pred] += 1 test_score = numpy.mean(test_losses)*100 test_error_array.append([]) test_error_array[-1].append(float(iter)/n_train_samples) test_error_array[-1].append(test_score) visualize_logistic(train_cost=train_cost_array, train_error=train_error_array, valid_error=valid_error_array, test_error=test_error_array, window_size=window_size, learning_rate=learning_rate, output_folder=output_folder, base_folder=base_folder) end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss, test_score) ) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) print(train_confusion_matrix, 'train_confusion_matrix') print(valid_confusion_matrix, 'valid_confusion_matrix') print(test_confusion_matrix, 'test_confusion_matrix')
def test_params(datasets, output_folder, base_folder, window_size, n_epochs=50): """Demonstrate conjugate gradient optimization of a log-linear model This is demonstrated on ICHI. :type n_epochs: int :param n_epochs: number of epochs to run the optimizer """ ############# # LOAD DATA # ############# # split the datasets (train_set_x, train_set_y) = datasets[0] (valid_set_x, valid_set_y) = datasets[1] (test_set_x, test_set_y) = datasets[2] # compute number of examples given in datasets n_train_samples = train_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_valid_samples = valid_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_test_samples = test_set_x.get_value(borrow=True).shape[0] - window_size + 1 n_in = window_size*3 # number of input units n_out = 7 # number of output units ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # generate symbolic variables for input x = T.matrix('x') # data, presented as window with x, y, x for each sample y = T.iscalar('y') # labels, presented as int label # construct the logistic regression class classifier = LogisticRegression(input=x, n_in=n_in, n_out=n_out) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compile a theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( [index], classifier.errors(y), givens={ x: test_set_x[index:index + window_size], y: test_set_y[index + window_size - 1] }, name="test" ) validate_model = theano.function( [index], classifier.errors(y), givens={ x: valid_set_x[index: index + window_size], y: valid_set_y[index + window_size - 1] }, name="validate" ) # compile a theano function that returns the cost conj_cost = theano.function( inputs=[index], outputs=[cost, classifier.errors(y), classifier.predict(), y], givens={ x: train_set_x[index: index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_cost" ) # compile a theano function that returns the gradient with respect to theta conj_grad = theano.function( [index], T.grad(cost, classifier.theta), givens={ x: train_set_x[index: index + window_size], y: train_set_y[index + window_size - 1] }, name="conj_grad" ) classifier.train_cost_array = [] classifier.train_error_array = [] train_confusion_matrix = numpy.zeros((7, 7)) classifier.epoch = 0 # creates a function that computes the average cost on the training set def train_fn(theta_value): classifier.theta.set_value(theta_value, borrow=True) cur_train_cost = [] cur_train_error =[] for i in xrange(n_train_samples): sample_cost, sample_error, cur_pred, cur_actual = conj_cost(i) cur_train_cost.append(sample_cost) cur_train_error.append(sample_error) train_confusion_matrix[cur_actual][cur_pred] += 1 this_train_loss = float(numpy.mean(cur_train_cost)) classifier.train_cost_array.append([]) classifier.train_cost_array[-1].append(classifier.epoch) classifier.train_cost_array[-1].append(this_train_loss) classifier.train_error_array.append([]) classifier.train_error_array[-1].append(classifier.epoch) classifier.train_error_array[-1].append(float(numpy.mean(cur_train_error)*100)) classifier.epoch += 1 return this_train_loss # creates a function that computes the average gradient of cost with # respect to theta def train_fn_grad(theta_value): classifier.theta.set_value(theta_value, borrow=True) grad = conj_grad(0) for i in xrange(1, n_train_samples): grad += conj_grad(i) return grad / n_train_samples classifier.validation_scores = [numpy.inf, 0] classifier.valid_error_array = [] classifier.test_error_array = [] # creates the validation function def callback(theta_value): classifier.theta.set_value(theta_value, borrow=True) #compute the validation loss validation_losses = [validate_model(i) for i in xrange(n_valid_samples)] this_validation_loss = float(numpy.mean(validation_losses) * 100.,) print('validation error %f %%' % (this_validation_loss)) classifier.valid_error_array.append([]) classifier.valid_error_array[-1].append(classifier.epoch) classifier.valid_error_array[-1].append(this_validation_loss) # check if it is better then best validation score got until now if this_validation_loss < classifier.validation_scores[0]: # if so, replace the old one, and compute the score on the # testing dataset classifier.validation_scores[0] = this_validation_loss test_losses = [test_model(i) for i in xrange(n_test_samples)] classifier.validation_scores[1] = float(numpy.mean(test_losses)) classifier.test_error_array.append([]) classifier.test_error_array[-1].append(classifier.epoch) classifier.test_error_array[-1].append(classifier.validation_scores[1]) ############### # TRAIN MODEL # ############### # using scipy conjugate gradient optimizer import scipy.optimize print ("Optimizing using scipy.optimize.fmin_cg...") start_time = timeit.default_timer() best_theta = scipy.optimize.fmin_cg( f=train_fn, x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype), fprime=train_fn_grad, callback=callback, disp=0, maxiter=n_epochs ) visualize_logistic(train_cost=classifier.train_cost_array, train_error=classifier.train_error_array, valid_error=classifier.valid_error_array, test_error=classifier.test_error_array, window_size=window_size, learning_rate=0, output_folder=output_folder, base_folder=base_folder) end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%, with ' 'test performance %f %%' ) % (classifier.validation_scores[0] * 100., classifier.validation_scores[1] * 100.) ) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))