def test_rbm_mnist(learning_rate=0.01, training_epochs=10, batch_size=20, n_chains=30, n_samples=5, output_folder=None, isPCD=0, n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain e.g. test_rbm_mnist(output_folder='/home/eric/Desktop/rbm_plots') """ assert output_folder is not None from rbm_variants import RBM_Orthogonal as RBM # from rbm import RBM ################################# # Data Constructing # ################################# from sklearn.datasets import fetch_mldata mnist = fetch_mldata('MNIST original') from xylearn.utils.data_util import get_train_test from xylearn.utils.data_normalization import rescale data = get_train_test(rescale(mnist.data), mnist.target, useGPU=1, shuffle=True) train_x, train_y = data['train'] n_vis = train_x.get_value(borrow=True).shape[1] print numpy.linalg.matrix_rank(train_x.get_value(borrow=True)) n_train_batches = train_x.get_value(borrow=True).shape[0] / batch_size # construct the RBM class rbm = RBM(n_visible=n_vis, n_hidden=n_hidden, isPCD=isPCD) train_fn = rbm.get_train_fn(train_x, batch_size) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) plotting_time = 0. start_time = time.clock() import PIL.Image from visualizer import tile_raster_images # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): # for each batch, we extract the gibbs chain new_cost = train_fn(index=batch_index, lr=learning_rate) mean_cost += [new_cost] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # monitor projected rank projection = rbm.project(train_x) print 'rank: ' + str(numpy.linalg.matrix_rank(projection)) # W shape is [784 500] # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(20, 20), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# test_idx = 1 test_x, test_y = data['test'] sample_fn = rbm.get_sampling_fn(test_x, test_idx, n_chains) print '... begin sampling' # plot initial image first orig_img = test_x.get_value(borrow=True)[test_idx:test_idx + 1] image = PIL.Image.fromarray(tile_raster_images( X=orig_img, img_shape=(28, 28), tile_shape=(1, 1), tile_spacing=(1, 1))) image.save('orig_img.png') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../') ################################# # Projecting from the RBM # ################################# projection = rbm.project(train_x) print numpy.linalg.matrix_rank(projection)
def toy_test(learning_rate=0.01, training_epochs=100, batch_size=50, output_folder=None, isPCD=0, n_hidden=3): assert output_folder is not None # toy_data, word count vector, [num_terms, num_doc]. # each cell represents the number of times a term occurs # d1 d2 d3 d4 d5 toy_data = numpy.asarray([[0, 2, 0, 1, 0], [9, 0, 3, 1, 1], [4, 1, 1, 2, 1], [10, 10, 1, 1, 0], [1, 0, 8, 0, 10], [0, 1, 10, 1, 0], [1, 0, 2, 6, 1], [0, 0, 1, 0, 0], [1, 0, 0, 0, 0], [1, 0, 1, 0, 0], [1, 1, 0, 0, 1], [10, 2, 0, 1, 0], [0, 0, 1, 0, 10], [1, 0, 0, 3, 0], [0, 0, 2, 0, 1], [10, 0, 1, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 1, 0], [1, 0, 1, 0, 0], [1, 0, 0, 0, 1], [1, 0, 1, 0, 0], [0, 0, 1, 0, 0]]) # from rbm import RBM from rbm_variants import RBM_Orthogonal as RBM # from rbm_variants import PoissonRBM as RBM train_x = toSharedX(toy_data, name="toy_data") n_vis = train_x.get_value(borrow=True).shape[1] n_samples = train_x.get_value(borrow=True).shape[0] if batch_size >= n_samples: batch_size = n_samples n_train_batches = n_samples / batch_size # construct the RBM class rbm = RBM(n_visible=n_vis, n_hidden=n_hidden, isPCD=isPCD) train_fn = rbm.get_train_fn(train_x, batch_size) print "... projecting" print rbm.project(train_x, hidSample=1) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) plotting_time = 0. start_time = time.clock() import PIL.Image from visualizer import tile_raster_images # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): # for each batch, we extract the gibbs chain new_cost = train_fn(index=batch_index, lr=learning_rate) mean_cost += [new_cost] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) if numpy.mean(mean_cost) >= 0: break # W shape is [784 500] # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X=rbm.W.get_value(borrow=True).T, # weight is [n_vis, n_hidden] # so, among 'n_hidden' rows, # each row corresponds to propdown one hidden unit img_shape=(1, n_vis), tile_shape=(n_hidden, 1), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) print "... projecting" print rbm.project(train_x, hidSample=1) print "... reconstructing" print rbm.reconstruct(train_x, showSample=1) * train_x.get_value(borrow=True)