def mlp_train(train_file_name, test_file_name, model_file_name, n_dim, n_label, n_hidden, learning_rate, L1_reg=0.00, L2_reg=0.0001, batch_size=20): logger.info('loading data') train_set_x, train_set_y, test_set_x, test_set_y = load_data(train_file_name, test_file_name, n_dim) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size logger.info('building the model') idx = T.lscalar() x = T.matrix('x') y = T.ivector('y') rng = np.random.RandomState(1234) hidden_W = None hidden_b = None sgd_W = None sgd_b = None if os.path.isfile(model_file_name): logger.debug('load existing model') hidden_W, hidden_b, sgd_W, sgd_b = np.load(model_file_name) classifier = MLP(rng=rng, input=x, n_in=n_dim, n_hidden=n_hidden, n_out=n_label, hidden_W=hidden_W, hidden_b=hidden_b, sgd_W=sgd_W, sgd_b=sgd_b) cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr test_model = theano.function(inputs=[idx], outputs=classifier.errors(y), givens={ x: test_set_x[idx * batch_size:(idx + 1) * batch_size], y: test_set_y[idx * batch_size:(idx + 1) * batch_size] }) gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) updates = [] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) train_model = theano.function(inputs=[idx], outputs=cost, updates=updates, givens={ x: train_set_x[idx * batch_size:(idx + 1) * batch_size], y: train_set_y[idx * batch_size:(idx + 1) * batch_size] }) logger.info('training the model') best_test_score = np.inf epoch = 0 start_time = time.clock() while True: epoch = epoch + 1 for minibatch_idx in xrange(n_train_batches): train_model(minibatch_idx) iter = (epoch - 1) * n_train_batches + minibatch_idx if (iter + 1) % n_train_batches == 0: test_loss = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_loss) logger.info(('epoch %i, ran for %.5f hr, minibatch %i/%i, test error %f %%') % (epoch, (time.clock() - start_time)/3600.0, minibatch_idx + 1, n_train_batches, test_score * 100.)) if test_score < best_test_score: np.save(model_file_name, (classifier.hiddenLayer.W.get_value(), classifier.hiddenLayer.b.get_value(), classifier.logRegressionLayer.W.get_value(), classifier.logRegressionLayer.b.get_value())) best_test_score = test_score logger.info(('epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_idx + 1, n_train_batches, best_test_score * 100.))
def sgd_train(train_file_name, test_file_name, model_file_name, n_dim, n_label, learning_rate, batch_size=600): logger.info('loading data') train_set_x, train_set_y, test_set_x, test_set_y = load_data(train_file_name, test_file_name, n_dim) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size logger.info('building the model') idx = T.lscalar() x = T.matrix('x') y = T.ivector('y') W = None b = None if os.path.isfile(model_file_name): logger.debug('load existing model') W, b = np.load(model_file_name) classifier = LogisticRegression(input=x, n_in=n_dim, n_out=n_label, W=W, b=b) cost = classifier.negative_log_likelihood(y) test_model = theano.function(inputs=[idx], outputs=classifier.errors(y), givens={ x: test_set_x[idx * batch_size: (idx + 1) * batch_size], y: test_set_y[idx * batch_size: (idx + 1) * batch_size] }) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] train_model = theano.function(inputs=[idx], outputs=cost, updates=updates, givens={ x: train_set_x[idx * batch_size: (idx + 1) * batch_size], y: train_set_y[idx * batch_size: (idx + 1) * batch_size] }) logger.info('training the model') best_test_score = np.inf epoch = 0 start_time = time.clock() while True: epoch = epoch + 1 for minibatch_idx in xrange(n_train_batches): train_model(minibatch_idx) iter = (epoch - 1) * n_train_batches + minibatch_idx if (iter + 1) % n_train_batches == 0: test_loss = [test_model(i) for i in xrange(n_test_batches)] test_score = np.mean(test_loss) logger.info(('epoch %i, ran for %.5f hr, minibatch %i/%i, test error %f %%') % (epoch, (time.clock() - start_time)/3600.0, minibatch_idx + 1, n_train_batches, test_score * 100.)) if test_score < best_test_score: np.save(model_file_name, (classifier.W.get_value(), classifier.b.get_value())) best_test_score = test_score logger.info(('epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_idx + 1, n_train_batches, best_test_score * 100.))