def example_mnist_tap_machine(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.power_law_decay(initial=0.1, coefficient=0.1) (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data, 'glorot_normal') perf = fit.ProgressMonitor( data, metrics=['ReconstructionError', 'EnergyDistance', 'HeatCapacity']) opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4, ascent=True) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) sgd = fit.SGD(rbm, data, opt, num_epochs, sampler=sampler, method=fit.tap, monitor=perf) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train() util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def test_grbm_save(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) grbm = model.Model([vis_layer, hid_layer]) with tempfile.NamedTemporaryFile() as file: store = pandas.HDFStore(file.name, mode='w') grbm.save(store) store.close()
def example_mnist_deep_rbm(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 500 batch_size = 100 learning_rate = schedules.power_law_decay(initial=0.01, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_1_layer = layers.BernoulliLayer(num_hidden_units) hid_2_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_1_layer, hid_2_layer]) rbm.initialize(data) metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.SequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, method=fit.pcd, sampler=sampler, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train_layerwise() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot) util.show_fantasy_particles(rbm, valid, fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def test_grbm_from_config(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) grbm = model.Model([vis_layer, hid_layer]) config = grbm.get_config() rbm_from_config = model.Model.from_config(config) config_from_config = rbm_from_config.get_config() assert config == config_from_config
def example_mnist_grbm(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 500 batch_size = 50 learning_rate = 0.001 # gaussian rbm usually requires smaller learnign rate mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=transform, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data) metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore' ] perf = fit.ProgressMonitor(data, metrics=metrics) opt = optimizers.ADAM(stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1)) sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic') cd = fit.SGD(rbm, data, opt, num_epochs, method=fit.pcd, sampler=sampler, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) util.show_reconstructions(rbm, data.get('validate'), fit, show_plot) util.show_fantasy_particles(rbm, data.get('validate'), fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def test_state_for_grad_DrivenSequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler sampler = fit.DrivenSequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the state of the hidden layer grad_state = sampler.state_for_grad(1, dropout_scale) assert be.allclose(data_state.units[0], grad_state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], grad_state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u) # compute the conditional mean with the layer function ave = rbm.layers[1].conditional_mean( rbm._connected_rescaled_units(1, data_state, dropout_scale), rbm._connected_weights(1)) assert be.allclose(ave, grad_state.units[1]), \ "hidden layer of grad_state should be conditional mean: {}".format(u)
def test_exponential_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.weights[0].W()]) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = be.mean(vdata, axis=0) d_hidden_loc = be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W_T()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W()]) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs.loc), \ "derivative of visible loc wrong in exponential-exponential rbm" assert be.allclose(d_hidden_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in exponential-exponential rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in exponential-exponential rbm"
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 500 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.GaussianLayer(num_hidden_units) hid_layer.set_fixed_params(["loc", "log_var"]) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data, method="glorot_normal") metrics = ['ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare'] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def test_random_grad(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # create a gradient object filled with random numbers gu.random_grad(rbm)
def test_clamped_SequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 steps = 1 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler with the visible layer clamped sampler = fit.SequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the sampler state and check the output sampler.update_state(steps, dropout_scale) assert be.allclose(data_state.units[0], sampler.state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], sampler.state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u)
def test_exponential_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # compute the conditional parameters using the layer functions hidden_rate_func = rbm.layers[1]._conditional_params([vdata], [rbm.weights[0].W()]) visible_rate_func = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_rate, hidden_rate_func), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, visible_rate_func), \ "visible rate wrong in exponential-exponential rbm"
def test_bernoulli_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += b visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += a # compute conditional parameters with layer funcitons hidden_field_layer = rbm.layers[1]._conditional_params( [vdata], [rbm.weights[0].W()]) visible_field_layer = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_field, hidden_field_layer), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, visible_field_layer), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_bernoulli_GFE_derivatives(): num_units = 500 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) rbm = model.Model([layer_1, layer_2, layer_3]) for i in range(len(rbm.weights)): rbm.weights[i].params.matrix[:] = \ 0.01 * be.randn(rbm.weights[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) GFE = rbm.gibbs_free_energy(state) lr = 0.1 gogogo = True grad = rbm.grad_TAP_free_energy(0.1, 1e-7, 50) while gogogo: cop = deepcopy(rbm) lr_mul = partial(be.tmul, -lr) delta = gu.grad_apply(lr_mul, grad) cop.parameter_update(delta) cop_state = cop.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) cop_GFE = cop.gibbs_free_energy(cop_state) regress = cop_GFE - GFE < 0.0 print(lr, cop_GFE, GFE, cop_GFE - GFE, regress) if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Bernoulli models" break else: lr *= 0.5 else: break
def test_grbm_reload(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) # create some extrinsics grbm = model.Model([vis_layer, hid_layer]) with tempfile.NamedTemporaryFile() as file: # save the model store = pandas.HDFStore(file.name, mode='w') grbm.save(store) store.close() # reload store = pandas.HDFStore(file.name, mode='r') grbm_reload = model.Model.from_saved(store) store.close() # check the two models are consistent vis_data = vis_layer.random((num_samples, num_vis)) data_state = model.State.from_visible(vis_data, grbm) vis_orig = grbm.deterministic_iteration(1, data_state).units[0] vis_reload = grbm_reload.deterministic_iteration(1, data_state).units[0] assert be.allclose(vis_orig, vis_reload)
def test_bernoulli_GFE_magnetization_gradient(): num_units = 500 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) layer_4 = layers.BernoulliLayer(num_units) rbm = model.Model([layer_1, layer_2, layer_3, layer_4]) for i in range(len(rbm.weights)): rbm.weights[i].params.matrix[:] = \ 0.01 * be.randn(rbm.weights[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state = mu.StateTAP.from_model_rand(rbm) GFE = rbm.gibbs_free_energy(state) lr = 0.001 gogogo = True grad = rbm._TAP_magnetization_grad(state) while gogogo: cop = deepcopy(state) for i in range(rbm.num_layers): cop.cumulants[ i].mean[:] = state.cumulants[i].mean + lr * grad[i].mean GFE_next = rbm.gibbs_free_energy(cop) regress = GFE_next - GFE < 0.0 if regress: if lr < 1e-6: assert False,\ "Bernoulli GFE magnetization gradient is wrong" break else: lr *= 0.5 else: break
def test_independent(): """ Test sampling from an rbm with two layers connected by a weight matrix that contains all zeros, so that the layers are independent. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 batch_size = 1000 steps = 100 mean_tol = 0.1 corr_tol = 0.2 # set a seed for the random number generator be.set_seed() layer_types = [layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = be.zeros((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units, )) log_var_b = be.randn((num_hidden_units, )) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(batch_size, rbm) dropout = State.dropout_rescale(rbm) # run a markov chain to update the state state = rbm.markov_chain(steps, state, dropout) # compute the mean state_for_moments = State.from_model(1, rbm) sample_mean = [ be.mean(state.units[i], axis=0) for i in range(len(state.units)) ] model_mean = [ rbm.layers[i].conditional_mean( rbm._connected_rescaled_units(i, state_for_moments, dropout), rbm._connected_weights(i)) for i in range(rbm.num_layers) ] # check that the means are roughly equal for i in range(rbm.num_layers): ave = sample_mean[i] close = be.allclose(ave, model_mean[i][0], rtol=mean_tol, atol=mean_tol) assert close, "{0} {1}: sample mean does not match model mean".format( layer_type, i) # check the cross correlation between the layers crosscov = be.cov(state.units[0], state.units[1]) norm = be.outer(be.std(state.units[0], axis=0), be.std(state.units[1], axis=0)) crosscorr = be.divide(norm, crosscov) assert be.tmax( be.tabs(crosscorr) ) < corr_tol, "{} cross correlation too large".format(layer_type)
def test_conditional_sampling(): """ Test sampling from one layer conditioned on the state of another layer. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 steps = 1000 mean_tol = 0.1 # set a seed for the random number generator be.set_seed() layer_types = [layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = 10 * be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units, )) log_var_b = be.randn((num_hidden_units, )) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(1, rbm) dropout = State.dropout_rescale(rbm) # set up a calculator for the moments moments = mu.MeanVarianceArrayCalculator() for _ in range(steps): moments.update(rbm.layers[0].conditional_sample( rbm._connected_rescaled_units(0, state, dropout), rbm._connected_weights(0))) model_mean = rbm.layers[0].conditional_mean( rbm._connected_rescaled_units(0, state, dropout), rbm._connected_weights(0)) ave = moments.mean close = be.allclose(ave, model_mean[0], rtol=mean_tol, atol=mean_tol) assert close, "{} conditional mean".format(layer_type) if layer_type == layers.GaussianLayer: model_mean, model_var = rbm.layers[0]._conditional_params( rbm._connected_rescaled_units(0, state, dropout), rbm._connected_weights(0)) close = be.allclose(be.sqrt(moments.var), be.sqrt(model_var[0]), rtol=mean_tol, atol=mean_tol) assert close, "{} conditional standard deviation".format( layer_type)
def test_grbm_config(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) grbm = model.Model([vis_layer, hid_layer]) grbm.get_config()
def test_hopfield_construction(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) rbm = model.Model([vis_layer, hid_layer])
def test_rmb_construction(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.BernoulliLayer(num_hid) rbm = model.Model([vis_layer, hid_layer])
def test_rbm(paysage_path=None): num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = fit.ProgressMonitor(data, metrics=['ReconstructionError']) untrained_performance = perf.check_progress(rbm) # set up the optimizer and the fit method opt = optimizers.RMSProp(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # obtain an estimate of the reconstruction error after 1 epoch trained_performance = perf.check_progress(rbm) assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 100 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.012, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_1_layer = layers.BernoulliLayer(num_hidden_units) hid_2_layer = layers.BernoulliLayer(num_hidden_units) hid_3_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_1_layer, hid_2_layer, hid_3_layer]) rbm.initialize(data, method='glorot_normal') print("Norms of the weights before training") util.weight_norm_histogram(rbm, show_plot=show_plot) # small penalties prevent the weights from consolidating rbm.weights[1].add_penalty({'matrix': pen.logdet_penalty(0.001)}) rbm.weights[2].add_penalty({'matrix': pen.logdet_penalty(0.001)}) metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.LayerwisePretrain(rbm, data, opt, num_epochs, method=fit.pcd, mcsteps=mc_steps, metrics=metrics) # fit the model print('training with persistent contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot) from math import sqrt dim = tuple([28] + [int(sqrt(num_hidden_units)) for _ in range(rbm.num_weights)]) util.show_weights(rbm, show_plot, dim=dim, n_weights=16) util.show_one_hot_reconstructions(rbm, fit, dim=28, n_recon=16, num_to_avg=1) print("Norms of the weights after training") util.weight_norm_histogram(rbm, show_plot=show_plot) # close the HDF5 store data.close() print("Done")
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches import pandas data = batch.InMemoryBatch(pre.binarize_color( be.float_tensor( pandas.read_hdf(shuffled_filepath, key='train/images').as_matrix())), batch_size, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.weights[0].add_penalty({'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='glorot_normal') metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def test_gaussian_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) visible_var = be.exp(log_var_a) vdata_scaled = vdata / visible_var # compute the mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean([vdata_scaled], [rbm.weights[0].W()]) hidden_var = be.exp(log_var_b) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_vis_loc = -be.mean(vdata_scaled, axis=0) d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0) d_vis_logvar += be.batch_dot( hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata) d_vis_logvar /= visible_var d_hid_loc = -be.mean(hid_mean_scaled, axis=0) d_hid_logvar = -0.5 * be.mean(be.square(hid_mean - b), axis=0) d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean, axis=0) / len(hid_mean) d_hid_logvar /= hidden_var d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W_T()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W()]) weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled) assert be.allclose(d_vis_loc, vis_derivs[0].loc), \ "derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_loc, hid_derivs[0].loc), \ "derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(d_vis_logvar, vis_derivs[0].log_var, rtol=1e-05, atol=1e-01), \ "derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_logvar, hid_derivs[0].log_var, rtol=1e-05, atol=1e-01), \ "derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_W, weight_derivs[0].matrix), \ "derivative of weights wrong in gaussian-gaussian rbm"
def test_gaussian_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute the variance visible_var = be.exp(log_var_a) hidden_var = be.exp(log_var_b) # rescale the data vdata_scaled = vdata / visible_var hdata_scaled = hdata / hidden_var # test rescale assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\ "visible rescale wrong in gaussian-gaussian rbm" assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\ "hidden rescale wrong in gaussian-gaussian rbm" # compute the mean hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units) hidden_mean += b visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units) visible_mean += a # update the conditional parameters using the layer functions vis_mean_func, vis_var_func = rbm.layers[0]._conditional_params( [hdata_scaled], [rbm.weights[0].W_T()]) hid_mean_func, hid_var_func = rbm.layers[1]._conditional_params( [vdata_scaled], [rbm.weights[0].W()]) assert be.allclose(visible_var, vis_var_func),\ "visible variance wrong in gaussian-gaussian rbm" assert be.allclose(hidden_var, hid_var_func),\ "hidden variance wrong in gaussian-gaussian rbm" assert be.allclose(visible_mean, vis_mean_func),\ "visible mean wrong in gaussian-gaussian rbm" assert be.allclose(hidden_mean, hid_mean_func),\ "hidden mean wrong in gaussian-gaussian rbm"