def train_rbm(): batch_size = 20 learning_rate = 0.1 n_training_epochs = 15 n_visible=28*28 n_hidden=500 n_contrastive_divergence_steps=15 persistent_contrastive_divergence=True rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) train_set, valid_set, test_set = get_dataset('mnist') train_set_x, _ = load_dataset(train_set) test_set_x, _ = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size x = T.matrix('x') if persistent_contrastive_divergence: persistent_chain = theano.shared( np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) else: persistent_chain = None rbm = RBM.create_with_random_weights(n_visible, n_hidden, rng) # persistent contrastive divergence with n_contrastive_divergence_steps steps cost, updates = rbm.get_cost_updates( x, learning_rate, number_of_gibbs_steps=n_contrastive_divergence_steps, theano_rng=theano_rng, persistent_state=persistent_chain) minibatch_index = T.iscalar('minibatch_index') train_rbm = theano.function( inputs=[minibatch_index], outputs=cost, updates=updates, givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], } ) start_time = time.time() for epoch in range(n_training_epochs): epoch_start_time = time.time() costs = [] for batch_index in range(n_train_batches): costs.append(train_rbm(batch_index)) print('Training epoch %d of %d, cost is %f, took %.1fs' % (epoch, n_training_epochs, np.mean(costs), time.time() - epoch_start_time)) filters = tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28)) cv2.imshow('filter', filters) cv2.waitKey(-1) cv2.destroyWindow('filter') print ('Training took %d minutes' % ((time.time()-start_time)/60.)) return rbm.get_parameter_values()
def test_check_mnist(): train_set, valid_set, test_set = get_dataset('mnist') assert (len(train_set), len(valid_set), len(test_set)) == (2, 2, 2) # mnist pictures are 28x28 = 784 of float grayscale values assert (train_set[0].shape, train_set[1].shape) == ((50000, 784), (50000,)) assert (valid_set[0].shape, valid_set[1].shape) == ((10000, 784), (10000,)) assert (test_set[0].shape, test_set[1].shape) == ((10000, 784), (10000,)) assert (train_set[0].dtype, train_set[1].dtype) == (np.float32, np.int64)
def run_3_denoising_autoencoder(corruption_level=0.3): batch_size = 20 learning_rate = 0.01 training_epochs = 250 n_in=28*28 n_hidden=500 rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) train_set, _, _ = get_dataset('mnist') train_set_x, train_set_y = load_dataset(train_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size x = T.matrix('x') corrupted_input = theano_rng.binomial(size=x.shape, n=1, p=1-corruption_level, dtype=theano.config.floatX)*x reconstructed, params = autoencoder(corrupted_input, n_in, n_hidden, rng) cost = mean_cross_entropy(reconstructed, x) minibatch_index = T.iscalar('minibatch_index') train_model = theano.function( inputs=[minibatch_index], outputs=[cost], updates=[[p, p - learning_rate*T.grad(cost, p)] for p in params], givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], }, profile=False ) start_time = time.time() print('Going to run the training with floatX=%s' % (theano.config.floatX)) for epoch in range(training_epochs): costs = [] epoch_start_time = time.time() for minibatch_index in range(n_train_batches): costs.append(train_model(minibatch_index)) print("Mean costs at epoch %d is %f%% (ran for %.1fs)" % (epoch, np.mean(costs), time.time() - epoch_start_time)) total_time = time.time()-start_time print('The training code run %.1fs, for %d epochs, for with %f epochs/sec' % (total_time, epoch, epoch/total_time)) filters = tile_raster_images(X=params[0].get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(23, 22), tile_spacing=(1, 1)) filters = cv2.resize(filters, dsize=None, fx=1., fy=1.) cv2.imshow('filters', filters) cv2.waitKey(-1)
def run_rnn_rbm_training(trained_model_filename, reuse_pretrained=False, num_epochs = 1): batch_size = 100 train_set_files, valid_set_files, test_set_files = get_dataset('nottingham') if reuse_pretrained: with open(trained_model_filename, 'r') as f: trained_model_params = cPickle.load(f) else: trained_model_params = None model = RnnRBM(network_parameters=trained_model_params) model.train(train_set_files, batch_size, num_epochs) with open(trained_model_filename, 'w') as f: cPickle.dump(model.get_params(), f, cPickle.HIGHEST_PROTOCOL)
def train_dbn(): batch_size = 10 finetune_learning_rate = 0.1 pretrain_learning_rate = 0.01 n_pretraining_epochs = 100 n_finetune_training_epochs = 100 n_in = 28 * 28 n_out = 10 hidden_layers_sizes = [1000, 1000, 1000] n_contrastive_divergence_steps = 1 persistent_contrastive_divergence = True rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) train_set, valid_set, test_set = get_dataset("mnist") train_set_x, train_set_y = load_dataset(train_set) valid_set_x, valid_set_y = load_dataset(valid_set) test_set_x, test_set_y = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size test_batch_size = valid_set_x.get_value(borrow=True).shape[0] n_validation_batches = valid_set_x.get_value(borrow=True).shape[0] / test_batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size x = T.matrix("x") y = T.ivector("y") minibatch_index = T.iscalar("minibatch_index") mlp_output, mlp_params, mlp_layers_description = deep_mlp(x, n_in, n_out, hidden_layers_sizes, rng) pretrain_functions = [] for layer_input, (W, b) in mlp_layers_description: rbm_layer = RBM(W=W, b_hidden=b) if persistent_contrastive_divergence: n_hidden = b.get_value(borrow=True).shape[0] persistent_chain = theano.shared(np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) else: persistent_chain = None layer_cost, layer_updates = rbm_layer.get_cost_updates( layer_input, pretrain_learning_rate, n_contrastive_divergence_steps, theano_rng, persistent_chain ) pretrain_rbm = theano.function( inputs=[minibatch_index], outputs=layer_cost, updates=layer_updates, givens={x: train_set_x[minibatch_index * batch_size : (minibatch_index + 1) * batch_size]}, ) pretrain_functions.append(pretrain_rbm) # PRETRAINING start_time = time.time() for i, pretrain_function in enumerate(pretrain_functions): layer_start_time = time.time() for epoch in range(n_pretraining_epochs): epoch_start_time = time.time() costs = [] for batch_index in range(n_train_batches): costs.append(pretrain_function(batch_index)) print( "Training epoch %d of %d, cost is %f, took %.1fs" % (epoch, n_pretraining_epochs, np.mean(costs), time.time() - epoch_start_time) ) print("Pretraining of layer %d took %d min" % (i, (time.time() - layer_start_time) / 60.0)) print("Pre training took %d minutes" % ((time.time() - start_time) / 60.0)) # FINETUNING y_predict = T.argmax(mlp_output, axis=1) finetune_cost = negative_log_likelihood_loss(mlp_output, y) finetune_train_model = theano.function( inputs=[minibatch_index], outputs=[finetune_cost], updates=[[p, p - finetune_learning_rate * T.grad(finetune_cost, p)] for p in mlp_params], givens={ x: train_set_x[minibatch_index * batch_size : (minibatch_index + 1) * batch_size], y: train_set_y[minibatch_index * batch_size : (minibatch_index + 1) * batch_size], }, profile=False, ) validation_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: valid_set_x[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size], y: valid_set_y[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size], }, ) test_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: test_set_x[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size], y: test_set_y[minibatch_index * test_batch_size : (minibatch_index + 1) * test_batch_size], }, ) start_time = time.time() def main_loop(): patience = 4 * n_train_batches patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) test_score = 0.0 best_validation_loss = np.inf print("Going to run the finetuning training with floatX=%s" % (theano.config.floatX)) for epoch in range(n_finetune_training_epochs): epoch_start = time.time() for minibatch_index_value in range(n_train_batches): finetune_train_model(minibatch_index_value) iteration = epoch * n_train_batches + minibatch_index_value if (iteration + 1) % validation_frequency == 0.0: validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)]) print("epoch %i, validation error %f %%" % (epoch, validation_cost * 100.0)) if validation_cost < best_validation_loss: if validation_cost < best_validation_loss * improvement_threshold: patience = max(patience, iteration * patience_increase) best_validation_loss = validation_cost test_score = np.mean([test_model(i) for i in range(n_test_batches)]) print(" epoch %i, minibatch test error of best model %f %%" % (epoch, test_score * 100.0)) if patience <= iteration: return epoch, best_validation_loss, test_score print( " - finished epoch %d out of %d in %.1fs" % (epoch, n_finetune_training_epochs, time.time() - epoch_start) ) return epoch, best_validation_loss, test_score epoch, best_validation_loss, test_score = main_loop() total_time = time.time() - start_time print( "Optimization complete in %d min with best validation score of %f %%, with test performance %f %%" % (total_time / 60, best_validation_loss * 100.0, test_score * 100.0) ) print("The code run for %d epochs, with %f epochs/sec" % (epoch, epoch / total_time))
def run_2_lenet_training( n_epochs = 200 ): batch_size = 500 learning_rate = 0.1 n_hidden = 500 n_out=10 rng = np.random.RandomState(23455) number_of_kernels = [20, 50] train_set, valid_set, test_set = get_dataset('mnist') train_set_x, train_set_y = load_dataset(train_set) valid_set_x, valid_set_y = load_dataset(valid_set) test_set_x, test_set_y = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size test_batch_size = batch_size n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/test_batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0]/test_batch_size x = T.matrix('x') y = T.ivector('y') layer_0_input = x.reshape((batch_size, 1, 28, 28)) conv_layer_0_out, conv_layer_0_params = conv_poll_layer( layer_0_input, feature_maps_count_in=1, feature_maps_count_out=number_of_kernels[0], filter_shape=(5, 5), maxpool_shape=(2, 2), image_shape=(batch_size, 1, 28, 28), rng=rng ) # filtering from the previous layer reduces the image size to (28-5+1 , 28-5+1) = (24, 24) # maxpooling from the prev. layer reduces this further to (24/2, 24/2) = (12, 12) conv_layer_1_out, conv_layer_1_params = conv_poll_layer( conv_layer_0_out, feature_maps_count_in=number_of_kernels[0], feature_maps_count_out=number_of_kernels[1], filter_shape=(5, 5), maxpool_shape=(2, 2), image_shape=(batch_size, number_of_kernels[0], 12, 12), rng=rng ) hidden_layer_output, hidden_layer_params = hidden_layer( conv_layer_1_out.flatten(2), n_in=number_of_kernels[1]*4*4, # 4 is the shape of output from layer1 maxpool layer n_out=n_hidden, rng=rng) output_layer_output, output_layer_params = logistic_layer(hidden_layer_output, n_hidden, n_out) y_predict = T.argmax(output_layer_output, axis=1) cost = negative_log_likelihood_loss(output_layer_output, y) minibatch_index = T.iscalar('minibatch_index') all_parameters = (conv_layer_0_params + conv_layer_1_params + hidden_layer_params + output_layer_params) train_model_impl = theano.function( inputs=[minibatch_index], outputs=[], updates=[[p, p - learning_rate*T.grad(cost, p)] for p in (conv_layer_0_params + conv_layer_1_params + hidden_layer_params + output_layer_params)], givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size], }, profile=False ) def train_model(*args): return train_model_impl(*args) validation_model_impl = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: valid_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], y: valid_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], } ) def validation_model(*args): return validation_model_impl(*args) test_model_impl = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: test_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], y: test_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], } ) def test_model(*args): return test_model_impl(*args) start_time = time.time() def main_loop(): patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = n_train_batches test_score = 0. best_validation_loss = np.inf print('Going to run the training with floatX=%s' % (theano.config.floatX)) for epoch in range(n_epochs): for minibatch_index in range(n_train_batches): batch_start = time.time() train_model(minibatch_index) print('Run training iteration in %.2f' % (time.time() - batch_start)) iteration = epoch*n_train_batches + minibatch_index if (iteration + 1) % validation_frequency == 0.: validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)]) print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.)) if validation_cost < best_validation_loss: if validation_cost < best_validation_loss*improvement_threshold: patience = max(patience, iteration*patience_increase) best_validation_loss = validation_cost test_score = np.mean([test_model(i) for i in range(n_test_batches)]) print(' epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.)) if patience <= iteration: return epoch, best_validation_loss, test_score return epoch, best_validation_loss, test_score epoch, best_validation_loss, test_score = main_loop() total_time = time.time()-start_time print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' % (total_time, best_validation_loss * 100., test_score * 100.)) print('The code run for %d epochs, with 1 epoch per %d sec' % (epoch+1, total_time/epoch)) with open('trained_lenet.pkl', 'w') as f: pickle.dump([p.get_value(borrow=True) for p in all_parameters], f, protocol=pickle.HIGHEST_PROTOCOL)
def run_1_mlp(): batch_size = 20 learning_rate = 0.01 n_epochs = 10 L1_reg_coeff = 0.00 L2_reg_coeff = 0.0001 n_in=28*28 n_hidden=500 n_out=10 rng = np.random.RandomState(1234) train_set, valid_set, test_set = get_dataset('mnist') train_set_x, train_set_y = load_dataset(train_set) valid_set_x, valid_set_y = load_dataset(valid_set) test_set_x, test_set_y = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size # load the whole test and validation set test_batch_size = valid_set_x.get_value(borrow=True).shape[0] n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/test_batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0]/test_batch_size x = T.matrix('x') y = T.ivector('y') hidden_layer_output, hidden_layer_params = hidden_layer(x, n_in, n_hidden, rng) output_layer_output, output_layer_params = logistic_layer(hidden_layer_output, n_hidden, n_out) y_predict = T.argmax(output_layer_output, axis=1) # weights decay L1 = abs(hidden_layer_params[0]).sum() + abs(output_layer_params[0]).sum() L2 = T.sqr(hidden_layer_params[0]).sum() + T.sqr(output_layer_params[0]).sum() cost = negative_log_likelihood_loss(output_layer_output, y) + L1_reg_coeff*L1 + L2_reg_coeff*L2 minibatch_index = T.iscalar('minibatch_index') train_model = theano.function( inputs=[minibatch_index], outputs=[], updates=[[p, p - learning_rate*T.grad(cost, p)] for p in (output_layer_params + hidden_layer_params)], givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size], }, profile=True ) validation_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: valid_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], y: valid_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], } ) test_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: test_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], y: test_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], } ) start_time = time.time() def main_loop(): patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = n_train_batches test_score = 0. best_validation_loss = np.inf print('Going to run the training with floatX=%s' % (theano.config.floatX)) for epoch in range(n_epochs): for minibatch_index in range(n_train_batches): train_model(minibatch_index) iteration = epoch*n_train_batches + minibatch_index if (iteration + 1) % validation_frequency == 0.: validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)]) print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.)) if validation_cost < best_validation_loss: if validation_cost < best_validation_loss*improvement_threshold: patience = max(patience, iteration*patience_increase) best_validation_loss = validation_cost test_score = np.mean([test_model(i) for i in range(n_test_batches)]) print(' epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.)) if patience <= iteration: return epoch, best_validation_loss, test_score return epoch, best_validation_loss, test_score epoch, best_validation_loss, test_score = main_loop() total_time = time.time()-start_time print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' % (total_time, best_validation_loss * 100., test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch/total_time))
def run_7_lstm_training(): train_set, valid_set, test_set = get_dataset('imdb') ydim = 2 # n_out dim_proj = 128 n_words = 10000 # this is implied in preprocessed imdb dataset use_dropout = True optimizer = adadelta valid_batch_size=64 validFreq=370 max_epochs= 100 batch_size=16 lrate=0.0001 dispFreq=10 patience=10 W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier = init_params(n_words, dim_proj, ydim) # use_noise is for dropout (use_noise, x, mask, y, f_pred_prob, f_pred, cost) = build_model( W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier, dim_proj, use_dropout) grads = T.grad(cost, wrt=[W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier]) lr = T.scalar(name='learning_rate') f_grad_shared, f_update = optimizer(lr, [W_embedding, W_lstm, U_lstm, b_lstm, W_classifier, b_classifier], grads, x, mask, y, cost) print 'Optimization' validation_minibatched_inidices = get_minibatches_idx(len(valid_set[0]), valid_batch_size) test_minibatched_inidices = get_minibatches_idx(len(test_set[0]), valid_batch_size) history_errs = [] uidx = 0 # the number of update done estop = False # early stop start_time = time.time() try: for eidx in xrange(max_epochs): n_samples = 0 # Get new shuffled index for the training set. training_minibatches_indices = get_minibatches_idx(len(train_set[0]), batch_size, shuffle=True) for train_index in training_minibatches_indices: uidx += 1 use_noise.set_value(1.) # Select the random examples for this minibatch current_x = [train_set[0][t] for t in train_index] current_y = [train_set[1][t] for t in train_index] # Get the data in numpy.ndarray format # This swap the axis! # Return something of shape (minibatch maxlen, n samples) current_x, current_mask = prepare_data(current_x) n_samples += current_x.shape[1] cost = f_grad_shared(current_x, current_mask, current_y) f_update(lrate) if uidx % dispFreq == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost if uidx % validFreq == 0: use_noise.set_value(0.) train_err = pred_error(f_pred, train_set, training_minibatches_indices) valid_err = pred_error(f_pred, valid_set, validation_minibatched_inidices) test_err = pred_error(f_pred, test_set, test_minibatched_inidices) history_errs.append([valid_err, test_err]) if (valid_err <= np.array(history_errs)[:, 0].min()): bad_counter = 0 print ('Train ', train_err, 'Valid ', valid_err, 'Test ', test_err) if (len(history_errs) > patience and valid_err >= np.array(history_errs)[:-patience, 0].min()): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break print 'Seen %d samples' % n_samples if estop: break except KeyboardInterrupt: print "Training interupted" end_time = time.time() use_noise.set_value(0.) train_minibatch_indices_sorted = get_minibatches_idx(len(train_set[0]), batch_size) train_err = pred_error(f_pred, train_set, train_minibatch_indices_sorted) valid_err = pred_error(f_pred, valid_set, validation_minibatched_inidices) test_err = pred_error(f_pred, test_set, test_minibatched_inidices) print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err print 'The code run for %d epochs, with %f sec/epochs' % ( (eidx + 1), (end_time - start_time) / (1. * (eidx + 1))) print ('Training took %.1fs' % (end_time - start_time)) return train_err, valid_err, test_err
def run_4_stacked_autoencoder(): batch_size = 1 finetune_learning_rate = 0.1 finetune_training_epochs = 50 pretrain_learning_rate = 0.001 pretraining_epochs = 15 n_in=28*28 hidden_layers_sizes=[1000, 1000, 1000] corruption_levels = [.1, .2, .3] n_out=10 rng = np.random.RandomState(89677) theano_rng = RandomStreams(rng.randint(2 ** 30)) train_set, valid_set, test_set = get_dataset('mnist') train_set_x, train_set_y = load_dataset(train_set) valid_set_x, valid_set_y = load_dataset(valid_set) test_set_x, test_set_y = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size test_batch_size = valid_set_x.get_value(borrow=True).shape[0] n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/test_batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0]/test_batch_size # construct deep mlp x = T.matrix('x') y = T.ivector('y') mlp_output, mlp_params, layers_description = deep_mlp(x, n_in=n_in, n_out=n_out, hidden_layers_sizes=hidden_layers_sizes, rng=rng) minibatch_index = T.iscalar('minibatch_index') # pretrain pretraining_models = [] for i, (layer_input, (W, b_hidden)) in enumerate(layers_description): corrupted_input = theano_rng.binomial( size=layer_input.shape, n=1, p=1-corruption_levels[i], dtype=theano.config.floatX)*layer_input reconstructed_output, autoencoder_params = autoencoder(corrupted_input, W, b_hidden) pretraining_cost = mean_cross_entropy(reconstructed_output, layer_input) pretraining_model = theano.function( inputs=[minibatch_index], outputs=[pretraining_cost], updates=[[p, p - pretrain_learning_rate*T.grad(pretraining_cost, p)] for p in autoencoder_params], givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], } ) pretraining_models.append(pretraining_model) y_predict = T.argmax(mlp_output, axis=1) finetune_cost = negative_log_likelihood_loss(mlp_output, y) finetune_train_model = theano.function( inputs=[minibatch_index], outputs=[finetune_cost], updates=[[p, p - finetune_learning_rate*T.grad(finetune_cost, p)] for p in mlp_params], givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size] }, profile=False ) validation_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: valid_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], y: valid_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], } ) test_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: test_set_x[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], y: test_set_y[minibatch_index*test_batch_size:(minibatch_index+1)*test_batch_size], } ) for i, pretraining_model in enumerate(pretraining_models): pretraining_start_time = time.time() print('Going to run the pretraining for layer %d with floatX=%s' % (i, theano.config.floatX)) for epoch in range(pretraining_epochs): costs = [] epoch_start_time = time.time() for minibatch_index_value in range(n_train_batches): costs.append(pretraining_model(minibatch_index_value)) print("Layer %d: mean costs at epoch %d is %f%% (ran for %.1fs)" % (i, epoch, np.mean(costs), time.time() - epoch_start_time)) total_pretraining_time = time.time()-pretraining_start_time print('The pretraining code for layer %d run %.1fs, for %d epochs, for with %f epochs/sec' % (i, total_pretraining_time, epoch, epoch/total_pretraining_time)) start_time = time.time() def main_loop(): patience = 10 * n_train_batches patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) test_score = 0. best_validation_loss = np.inf print('Going to run the finetuning training with floatX=%s' % (theano.config.floatX)) for epoch in range(finetune_training_epochs): epoch_start = time.time() for minibatch_index_value in range(n_train_batches): finetune_train_model(minibatch_index_value) iteration = epoch*n_train_batches + minibatch_index_value if (iteration + 1) % validation_frequency == 0.: validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)]) print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.)) if validation_cost < best_validation_loss: if validation_cost < best_validation_loss*improvement_threshold: patience = max(patience, iteration*patience_increase) best_validation_loss = validation_cost test_score = np.mean([test_model(i) for i in range(n_test_batches)]) print(' epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.)) if patience <= iteration: return epoch, best_validation_loss, test_score print(' - finished epoch %d out of %d in %.1fs' % (epoch, finetune_training_epochs, time.time() - epoch_start)) return epoch, best_validation_loss, test_score epoch, best_validation_loss, test_score = main_loop() total_time = time.time()-start_time print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' % (total_time, best_validation_loss * 100., test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch/total_time))
def sample_from_trained_rbm(w_init, b_hidden_init, b_visible_init): # for sampling from trained model n_chains = 20 n_samples = 10 mnist_pkl = get_dataset('mnist') with open(mnist_pkl) as f: train_set, valid_set, test_set = pickle.load(f) test_set_x, _ = load_dataset(test_set) # sample from trained RBM number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain rng = np.random.RandomState(123) test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( np.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX ) ) theano_rng = RandomStreams(rng.randint(2 ** 30)) plot_every = 1000 rbm = RBM(w_init, b_hidden_init, b_visible_init) (hidden_samples, hidden_activations, hidden_linear_activations, visible_samples, visible_activations, linear_visible_activations), sampling_updates = theano.scan( fn=lambda x: rbm.gibbs_update_visible_hidden_visible(x, theano_rng), outputs_info=[None, None, None, persistent_vis_chain, None, None], n_steps=plot_every ) sampling_updates[persistent_vis_chain] = visible_samples[-1] sample_fn = theano.function( [], [ visible_activations[-1], visible_samples[-1] ], updates=sampling_updates ) image_data = np.zeros( (29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8' ) for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_activations, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_activations, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1) ) image_data = cv2.resize(image_data, dsize=None, fx=2., fy=2.) cv2.imshow('sampling', image_data) cv2.waitKey(-1)
def run_0_logistic_regression(): batch_size = 600 learning_rate = 0.13 n_epochs = 1000 train_set, valid_set, test_set = get_dataset('mnist') train_set_x, train_set_y = load_dataset(train_set) valid_set_x, valid_set_y = load_dataset(valid_set) test_set_x, test_set_y = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size n_validation_batches = valid_set_x.get_value(borrow=True).shape[0]/batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0]/batch_size x = T.matrix('x') y = T.ivector('y') n_in=28*28 n_out=10 W = theano.shared( np.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) b = theano.shared( np.zeros((n_out,), dtype=theano.config.floatX), name='b', borrow=True ) py_given_x = T.nnet.softmax(T.dot(x, W)+b) y_predict = T.argmax(py_given_x, axis=1) cost = negative_log_likelihood_loss(py_given_x, y) minibatch_index = T.iscalar('minibatch_index') train_model = theano.function( inputs=[minibatch_index], outputs=[], updates=( [W, W - learning_rate*T.grad(cost, W)], [b, b - learning_rate*T.grad(cost, b)], ), givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], y: train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size], } ) validation_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: valid_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], y: valid_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size], } ) test_model = theano.function( inputs=[minibatch_index], outputs=one_zero_loss(y_predict, y), givens={ x: test_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], y: test_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size], } ) start_time = time.time() def main_loop(): patience = 5000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = n_train_batches test_score = 0. best_validation_loss = np.inf for epoch in range(n_epochs): for minibatch_index in range(n_train_batches): train_model(minibatch_index) iteration = epoch*n_train_batches + minibatch_index if (iteration + 1) % validation_frequency == 0.: validation_cost = np.mean([validation_model(i) for i in range(n_validation_batches)]) print('epoch %i, validation error %f %%' % (epoch, validation_cost * 100.)) if validation_cost < best_validation_loss: if validation_cost < best_validation_loss*improvement_threshold: patience = max(patience, iteration*patience_increase) best_validation_loss = validation_cost test_score = np.mean([test_model(i) for i in range(n_test_batches)]) print(' epoch %i, minibatch test error of best model %f %%' % (epoch, test_score * 100.)) if patience <= iteration: return epoch, best_validation_loss, test_score return epoch, best_validation_loss, test_score epoch, best_validation_loss, test_score = main_loop() total_time = time.time()-start_time print('Optimization complete in %.1fs with best validation score of %f %%, with test performance %f %%' % (total_time, best_validation_loss * 100., test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % (epoch, epoch/total_time)) assert(abs(best_validation_loss - 0.075) < 1e-6) assert(abs(test_score == 0.07489583) < 1e-6)