def reconstruct( autoencoder_in=None, dataset='mnist.pkl.gz', batch_size=8 ): # Loads the parameters of a trained model # Takes a test case, run it through the # trained autoencoder, and plot the reconstructed version # to see how our autoencoder generalizes an arbitrary case. if not autoencoder_in: W, hbias, vbias = load_parameters() autoencoder_in = autoencoder(W = W, hbias = hbias, vbias = vbias) test_set_x, test_set_y = load_data(dataset)[2] x = T.matrix('x') index = T.iscalar('index') pre_output = autoencoder_in.test_prop(input = x, params = autoencoder_in.params) reconstruct = autoencoder_in.layer_info[0](pre_output) error = autoencoder_in.gradient_reconstruction_error(input = x, phase = 'test', params = autoencoder_in.params) sgd_test = theano.function( [index], [reconstruct, error], givens = { x : test_set_x[ index * batch_size : (index + 1) * batch_size ] }, name = 'sgd_test' ) original = test_set_x.get_value(borrow=True)[8:16] reconstructed, error = sgd_test(1) print 'Reconstruction error is %3f' % error images = np.append( original, reconstructed ).reshape((batch_size * 2, 28*28)) print images.shape[0], images.shape[1] images = Image.fromarray( tile_raster_images( X = images, img_shape = (28, 28), tile_shape = (2, batch_size), tile_spacing = (2,2) ) ) images.save('autoencoder_reconstructed_images.png') pass
def Sample(dataset="mnist.pkl.gz", random_initialization=True, sample_every=1000, no_samples=1): # Initialize sample randomly if random_initialization = True. # For a Gibbs chain, take a sample after (sample_every) steps. # no_samples: int, how many samples to be taken. RBMin = RBM(resume=True) datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] # Chose test set nrg = np.random.RandomState() if random_initialization: chain_start = theano.shared(nrg.uniform(low=0.0, high=1.0, size=(28 * 28,)).astype("float32")) else: chain_start = theano.shared( test_set_x.get_value(borrow=True)[np.floor(28 * 28 * nrg.uniform(low=0.0, high=5.0)).astype("int32")] ) # Run a single round of Gibbs sampler ([h0_pre, h0_mean, h0, v1_pre, v1_mean, v1], updates) = theano.scan( RBMin.GS_vhv, outputs_info=[None, None, None, None, chain_start, None], n_steps=sample_every ) # Update the updates dictionary updates[chain_start] = v1_mean[-1] GS = theano.function([], [v1_mean[-1], v1[-1]], updates=updates, name="Gibbs Sampler") # Plot samples. # Flattened and reshaped accordingly so that each row # represents an image start_time = timeit.default_timer() images = np.array([GS() for i in range(no_samples)], "float32").flatten().reshape((2 * no_samples, 28 * 28)) images = Image.fromarray( tile_raster_images(X=images, img_shape=(28, 28), tile_shape=(no_samples, 2), tile_spacing=(1, 5)) ) images.save("RBM_generated_samples.png") end_time = timeit.default_timer() print "Sampling took %f minutes" % ((end_time - start_time) / 60.0)
def train_RBM( RBMin=None, lr=0.01, lr_decay=0.1, momentum=0.9, improvement_ratio=0.95, batch_size=20, dataset="mnist.pkl.gz", epochs=15, n_hidden=500, n_chains=20, n_samples=10, ): # n_chains: int, # of parallel Gibbs chains to be used for sampling # n_samples: int, # samples to plot for each chain lr_init = lr if not RBMin: RBMin = RBM() datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] index = T.iscalar() x = T.matrix() xent, updates = CD_k(RBMin, input=x, lr=lr) optimize = theano.function( [index], xent, updates=updates, givens={x: train_set_x[index * batch_size : (index + 1) * batch_size]}, name="optimize", ) n_batch = train_set_x.get_value().shape[0] / batch_size train_error = np.array([], "float64") learning_rate = [] start_time = timeit.default_timer() for epoch in range(epochs): print "epoch %d:" % epoch, "\n" for iter in range(n_batch): error = optimize(iter) if iter % 250 == 0: # print Recon error every 5000 examples & save in train_error for later plotting print "Reconstruction error at iteration %d:" % (iter * batch_size), error train_error = np.append(train_error, error) # Check if the recon error of the last epoch has improved. # If yes, maintain the current learning rate. # Otherwise, lr = lr * lr_decay # Check last 25,000 examples. # Save the learning rate print "\n", "learning rate for epoch %d: %f" % (epoch, lr), "\n" learning_rate.append(lr) if epoch > 0: if ( train_error[-5:].mean() > improvement_ratio * train_error[-5 - 50000.0 / (250 * batch_size) : -50000.0 / (250 * batch_size)].mean() ): lr = lr * lr_decay xent, updates = CD_k(RBMin, input=x, lr=lr) # Save models & train_error each epoch f = file("RBM_weights.save", "wb") pickle.dump(RBMin.W.get_value(borrow=True), f, protocol=pickle.HIGHEST_PROTOCOL) f.close() f = file("RBM_hbias.save", "wb") pickle.dump(RBMin.hbias.get_value(borrow=True), f, protocol=pickle.HIGHEST_PROTOCOL) f.close() f = file("RBM_vbias.save", "wb") pickle.dump(RBMin.vbias.get_value(borrow=True), f, protocol=pickle.HIGHEST_PROTOCOL) f.close() f = file("train_error.save", "wb") pickle.dump(train_error, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() f = file("learning_rate.save", "wb") pickle.dump(learning_rate, f, protocol=pickle.HIGHEST_PROTOCOL) f.close() # plot weights at each epoch image = Image.fromarray( tile_raster_images( X=RBMin.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(20, 20), tile_spacing=(1, 1) ) ) image.save("Weights_at_epoch_%d.png" % (epoch + 2)) # Stop training if learning rate becomes too low # This means objective is simply not improving if lr <= lr_init * 0.001: break # plot the training procedure _, axis = pylab.subplots() grid = np.arange(len(train_error)) axis.plot(grid, train_error) pylab.plot() pylab.show() end_time = timeit.default_timer() pretraining_time = end_time - start_time print "Pretraining took %f minutes" % (pretraining_time / 60.0)