def example_mnist_rbm(paysage_path=None, show_plot = False): num_hidden_units = 500 batch_size = 50 num_epochs = 10 learning_rate = 0.01 mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) rbm.initialize(data) # set up the optimizer and the fit method opt = optimizers.ADAM(rbm, stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1)) sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic') cd = fit.PCD(rbm, data, opt, sampler, num_epochs, mcsteps=mc_steps, skip=200, metrics=[M.ReconstructionError(), M.EnergyDistance(), M.EnergyGap(), M.EnergyZscore()]) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model # this will be the same as the final epoch results # it is repeated here to be consistent with the sklearn rbm example metrics = [M.ReconstructionError(), M.EnergyDistance(), M.EnergyGap(), M.EnergyZscore()] performance = fit.ProgressMonitor(0, data, metrics=metrics) util.show_metrics(rbm, performance) util.show_reconstructions(rbm, data.get('validate'), fit, show_plot) util.show_fantasy_particles(rbm, data.get('validate'), fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def test_exponential_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata], [rbm.weights[0].W()]) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = be.mean(vdata, axis=0) d_hidden_loc = be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs.loc), \ "derivative of visible loc wrong in exponential-exponential rbm" assert be.allclose(d_hidden_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in exponential-exponential rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in exponential-exponential rbm"
def test_bernoulli_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params['loc'] = a rbm.layers[1].int_params['loc'] = b rbm.weights[0].int_params['matrix'] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update(vdata, rbm.weights[0].W()) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, hid_mean_scaled, rbm.weights[0].W()) hid_derivs = rbm.layers[1].derivatives(hid_mean, vdata_scaled, be.transpose(rbm.weights[0].W())) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs['loc']), \ "derivative of visible loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_hidden_loc, hid_derivs['loc']), \ "derivative of hidden loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_W, weight_derivs['matrix']), \ "derivative of weights wrong in bernoulli-bernoulli rbm"
def test_exponential_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # update the extrinsic parameter using the layer functions rbm.layers[1].update([vdata], [rbm.weights[0].W()]) rbm.layers[0].update([hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_rate, rbm.layers[1].ext_params.rate), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, rbm.layers[0].ext_params.rate), \ "visible rate wrong in exponential-exponential rbm"
def test_bernoulli_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += be.broadcast(b, hidden_field) visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += be.broadcast(a, visible_field) # update the extrinsic parameter using the layer functions rbm.layers[0].update([hdata], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata], [rbm.weights[0].W()]) assert be.allclose(hidden_field, rbm.layers[1].ext_params.field), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, rbm.layers[0].ext_params.field), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_gaussian_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.layers[0].int_params.log_var[:] = log_var_a rbm.layers[1].int_params.log_var[:] = log_var_b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) visible_var = be.exp(log_var_a) vdata_scaled = vdata / be.broadcast(visible_var, vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) hidden_var = be.exp(log_var_b) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_vis_loc = -be.mean(vdata_scaled, axis=0) d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0) d_vis_logvar += be.batch_dot( hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata) d_vis_logvar /= visible_var d_hid_loc = -be.mean(hid_mean_scaled, axis=0) d_hid_logvar = -0.5 * be.mean( be.square(hid_mean - be.broadcast(b, hid_mean)), axis=0) d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean, axis=0) / len(hid_mean) d_hid_logvar /= hidden_var d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled) # compute the derivatives using the layer functions rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) rbm.layers[0].update([hid_mean_scaled], [rbm.weights[0].W_T()]) vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled) assert be.allclose(d_vis_loc, vis_derivs.loc), \ "derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(d_vis_logvar, vis_derivs.log_var, rtol=1e-05, atol=1e-01), \ "derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_logvar, hid_derivs.log_var, rtol=1e-05, atol=1e-01), \ "derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in gaussian-gaussian rbm"
def test_gaussian_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.layers[0].int_params.log_var[:] = log_var_a rbm.layers[1].int_params.log_var[:] = log_var_b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute the variance visible_var = be.exp(log_var_a) hidden_var = be.exp(log_var_b) # rescale the data vdata_scaled = vdata / be.broadcast(visible_var, vdata) hdata_scaled = hdata / be.broadcast(hidden_var, hdata) # test rescale assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\ "visible rescale wrong in gaussian-gaussian rbm" assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\ "hidden rescale wrong in gaussian-gaussian rbm" # compute the mean hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units) hidden_mean += be.broadcast(b, hidden_mean) visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units) visible_mean += be.broadcast(a, visible_mean) # update the extrinsic parameters using the layer functions rbm.layers[0].update([hdata_scaled], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) assert be.allclose(visible_var, rbm.layers[0].ext_params.variance),\ "visible variance wrong in gaussian-gaussian rbm" assert be.allclose(hidden_var, rbm.layers[1].ext_params.variance),\ "hidden variance wrong in gaussian-gaussian rbm" assert be.allclose(visible_mean, rbm.layers[0].ext_params.mean),\ "visible mean wrong in gaussian-gaussian rbm" assert be.allclose(hidden_mean, rbm.layers[1].ext_params.mean),\ "hidden mean wrong in gaussian-gaussian rbm"
def test_rbm(paysage_path=None): """TODO : this is just a placeholder, need to clean up & simplifiy setup. Also need to figure how to deal with consistent random seeding throughout the codebase to obtain deterministic checkable results. """ num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = 0.01 mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = fit.ProgressMonitor(0, data, metrics=[M.ReconstructionError()]) untrained_performance = perf.check_progress(rbm, 0) # set up the optimizer and the fit method opt = optimizers.RMSProp(rbm, stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1)) sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic') cd = fit.PCD(rbm, data, opt, sampler, num_epochs, mcsteps=mc_steps, skip=200, metrics=[M.ReconstructionError()]) # fit the model print('training with contrastive divergence') cd.train() # obtain an estimate of the reconstruction error after 1 epoch trained_performance = perf.check_progress(rbm, 0) assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()