def train_rbm(): batch_size = 20 learning_rate = 0.1 n_training_epochs = 15 n_visible=28*28 n_hidden=500 n_contrastive_divergence_steps=15 persistent_contrastive_divergence=True rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) train_set, valid_set, test_set = get_dataset('mnist') train_set_x, _ = load_dataset(train_set) test_set_x, _ = load_dataset(test_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size x = T.matrix('x') if persistent_contrastive_divergence: persistent_chain = theano.shared( np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) else: persistent_chain = None rbm = RBM.create_with_random_weights(n_visible, n_hidden, rng) # persistent contrastive divergence with n_contrastive_divergence_steps steps cost, updates = rbm.get_cost_updates( x, learning_rate, number_of_gibbs_steps=n_contrastive_divergence_steps, theano_rng=theano_rng, persistent_state=persistent_chain) minibatch_index = T.iscalar('minibatch_index') train_rbm = theano.function( inputs=[minibatch_index], outputs=cost, updates=updates, givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], } ) start_time = time.time() for epoch in range(n_training_epochs): epoch_start_time = time.time() costs = [] for batch_index in range(n_train_batches): costs.append(train_rbm(batch_index)) print('Training epoch %d of %d, cost is %f, took %.1fs' % (epoch, n_training_epochs, np.mean(costs), time.time() - epoch_start_time)) filters = tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28)) cv2.imshow('filter', filters) cv2.waitKey(-1) cv2.destroyWindow('filter') print ('Training took %d minutes' % ((time.time()-start_time)/60.)) return rbm.get_parameter_values()
def run_3_denoising_autoencoder(corruption_level=0.3): batch_size = 20 learning_rate = 0.01 training_epochs = 250 n_in=28*28 n_hidden=500 rng = np.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) train_set, _, _ = get_dataset('mnist') train_set_x, train_set_y = load_dataset(train_set) n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size x = T.matrix('x') corrupted_input = theano_rng.binomial(size=x.shape, n=1, p=1-corruption_level, dtype=theano.config.floatX)*x reconstructed, params = autoencoder(corrupted_input, n_in, n_hidden, rng) cost = mean_cross_entropy(reconstructed, x) minibatch_index = T.iscalar('minibatch_index') train_model = theano.function( inputs=[minibatch_index], outputs=[cost], updates=[[p, p - learning_rate*T.grad(cost, p)] for p in params], givens={ x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size], }, profile=False ) start_time = time.time() print('Going to run the training with floatX=%s' % (theano.config.floatX)) for epoch in range(training_epochs): costs = [] epoch_start_time = time.time() for minibatch_index in range(n_train_batches): costs.append(train_model(minibatch_index)) print("Mean costs at epoch %d is %f%% (ran for %.1fs)" % (epoch, np.mean(costs), time.time() - epoch_start_time)) total_time = time.time()-start_time print('The training code run %.1fs, for %d epochs, for with %f epochs/sec' % (total_time, epoch, epoch/total_time)) filters = tile_raster_images(X=params[0].get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(23, 22), tile_spacing=(1, 1)) filters = cv2.resize(filters, dsize=None, fx=1., fy=1.) cv2.imshow('filters', filters) cv2.waitKey(-1)
def sample_from_trained_rbm(w_init, b_hidden_init, b_visible_init): # for sampling from trained model n_chains = 20 n_samples = 10 mnist_pkl = get_dataset('mnist') with open(mnist_pkl) as f: train_set, valid_set, test_set = pickle.load(f) test_set_x, _ = load_dataset(test_set) # sample from trained RBM number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain rng = np.random.RandomState(123) test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( np.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX ) ) theano_rng = RandomStreams(rng.randint(2 ** 30)) plot_every = 1000 rbm = RBM(w_init, b_hidden_init, b_visible_init) (hidden_samples, hidden_activations, hidden_linear_activations, visible_samples, visible_activations, linear_visible_activations), sampling_updates = theano.scan( fn=lambda x: rbm.gibbs_update_visible_hidden_visible(x, theano_rng), outputs_info=[None, None, None, persistent_vis_chain, None, None], n_steps=plot_every ) sampling_updates[persistent_vis_chain] = visible_samples[-1] sample_fn = theano.function( [], [ visible_activations[-1], visible_samples[-1] ], updates=sampling_updates ) image_data = np.zeros( (29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8' ) for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_activations, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_activations, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1) ) image_data = cv2.resize(image_data, dsize=None, fx=2., fy=2.) cv2.imshow('sampling', image_data) cv2.waitKey(-1)