def example_mnist_tap_machine(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.power_law_decay(initial=0.1, coefficient=0.1) (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data, 'glorot_normal') perf = fit.ProgressMonitor( data, metrics=['ReconstructionError', 'EnergyDistance', 'HeatCapacity']) opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4, ascent=True) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) sgd = fit.SGD(rbm, data, opt, num_epochs, sampler=sampler, method=fit.tap, monitor=perf) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train() util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def example_mnist_deep_rbm(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 500 batch_size = 100 learning_rate = schedules.power_law_decay(initial=0.01, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_1_layer = layers.BernoulliLayer(num_hidden_units) hid_2_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_1_layer, hid_2_layer]) rbm.initialize(data) metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.SequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, method=fit.pcd, sampler=sampler, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train_layerwise() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot) util.show_fantasy_particles(rbm, valid, fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def example_mnist_rbm(paysage_path=None, show_plot = False): num_hidden_units = 500 batch_size = 50 num_epochs = 10 learning_rate = 0.01 mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) rbm.initialize(data) # set up the optimizer and the fit method opt = optimizers.ADAM(rbm, stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1)) sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic') cd = fit.PCD(rbm, data, opt, sampler, num_epochs, mcsteps=mc_steps, skip=200, metrics=[M.ReconstructionError(), M.EnergyDistance(), M.EnergyGap(), M.EnergyZscore()]) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model # this will be the same as the final epoch results # it is repeated here to be consistent with the sklearn rbm example metrics = [M.ReconstructionError(), M.EnergyDistance(), M.EnergyGap(), M.EnergyZscore()] performance = fit.ProgressMonitor(0, data, metrics=metrics) util.show_metrics(rbm, performance) util.show_reconstructions(rbm, data.get('validate'), fit, show_plot) util.show_fantasy_particles(rbm, data.get('validate'), fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def run(pretrain_epochs=5, finetune_epochs=5, fit_method=fit.LayerwisePretrain, show_plot=False): num_hidden_units = [20**2, 15**2, 10**2] batch_size = 100 mc_steps = 5 beta_std = 0.6 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = [layers.BernoulliLayer(n) for n in num_hidden_units] rbm = BoltzmannMachine([vis_layer] + hid_layer) # add some penalties for c in rbm.connections: c.weights.add_penalty({"matrix": pen.l1_adaptive_decay_penalty_2(1e-4)}) print("Norms of the weights before training") util.weight_norm_histogram(rbm, show_plot=show_plot) print('pre-training with persistent contrastive divergence') cd = fit_method(rbm, data) learning_rate = schedules.PowerLawDecay(initial=5e-3, coefficient=1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, pretrain_epochs, method=fit.pcd, mcsteps=mc_steps, init_method="glorot_normal") util.show_weights(rbm, show_plot, n_weights=16) print('fine tuning') cd = fit.StochasticGradientDescent(rbm, data) cd.monitor.generator_metrics.append(M.JensenShannonDivergence()) learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, finetune_epochs, mcsteps=mc_steps, beta_std=beta_std) util.show_metrics(rbm, cd.monitor) # evaluate the model valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=10, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=16) print("Norms of the weights after training") util.weight_norm_histogram(rbm, show_plot=show_plot) # close the HDF5 store data.close() print("Done") return rbm
def test_state_for_grad_DrivenSequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler sampler = fit.DrivenSequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the state of the hidden layer grad_state = sampler.state_for_grad(1, dropout_scale) assert be.allclose(data_state.units[0], grad_state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], grad_state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u) # compute the conditional mean with the layer function ave = rbm.layers[1].conditional_mean( rbm._connected_rescaled_units(1, data_state, dropout_scale), rbm._connected_weights(1)) assert be.allclose(ave, grad_state.units[1]), \ "hidden layer of grad_state should be conditional mean: {}".format(u)
def test_bernoulli_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata], [rbm.weights[0].W()]) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs.loc), \ "derivative of visible loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_hidden_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in bernoulli-bernoulli rbm"
def test_random_grad(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # create a gradient object filled with random numbers gu.random_grad(rbm)
def test_zero_grad(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with zeros gu.zero_grad(rbm)
def test_grad_accumulate(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) gu.grad_accumulate(be.norm, grad)
def run(num_epochs=10, show_plot=False): num_hidden_units = 100 batch_size = 100 mc_steps = 10 beta_std = 0.6 # set up the reader to get minibatches with util.create_batch(batch_size, train_fraction=0.95, transform=transform) as data: # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units, center=False) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='pca') print('training with persistent contrastive divergence') cd = fit.SGD(rbm, data) learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, num_epochs, mcsteps=mc_steps, method=fit.pcd) util.show_metrics(rbm, cd.monitor) # evaluate the model valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=100) print("Done") return rbm
def test_bernoulli_GFE_derivatives(): # Tests that the GFE derivative update increases GFE versus 100 # random update vectors num_units = 5 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) rbm = BoltzmannMachine([layer_1, layer_2, layer_3]) for i in range(len(rbm.connections)): rbm.connections[i].weights.params.matrix[:] = \ 0.01 * be.randn(rbm.connections[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) grad = rbm._grad_gibbs_free_energy(state) gu.grad_normalize_(grad) for i in range(100): lr = 1.0 gogogo = True random_grad = gu.random_grad(rbm) gu.grad_normalize_(random_grad) while gogogo: cop1 = deepcopy(rbm) lr_mul = partial(be.tmul, lr) cop1.parameter_update(gu.grad_apply(lr_mul, grad)) cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) cop2 = deepcopy(rbm) cop2.parameter_update(gu.grad_apply(lr_mul, random_grad)) cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) regress = cop2_GFE - cop1_GFE < 0.0 if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Bernoulli models" break else: lr *= 0.5 else: break
def test_bernoulli_derivatives(): ly = layers.BernoulliLayer(num_vis) w = layers.Weights((num_vis, num_hid)) vis = ly.random((num_samples, num_vis)) hid = [be.randn((num_samples, num_hid))] weights = [w.W_T()] ly.derivatives(vis, hid, weights)
def test_exponential_update(): ly = layers.BernoulliLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly.update(scaled_units, weights, beta)
def test_bernoulli_build_from_config(): ly = layers.BernoulliLayer(num_vis) ly.add_constraint({'loc': constraints.non_negative}) p = penalties.l2_penalty(0.37) ly.add_penalty({'log_var': p}) ly_new = layers.Layer.from_config(ly.get_config()) assert ly_new.get_config() == ly.get_config()
def test_get_base_config(): ly = layers.BernoulliLayer(num_vis) ly.add_constraint({'loc': constraints.non_negative}) p = penalties.l2_penalty(0.37) ly.add_penalty({'loc': p}) ly.set_fixed_params(['loc']) ly.get_base_config()
def test_bernoulli_conditional_params(): ly = layers.BernoulliLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly._conditional_params(scaled_units, weights, beta)
def test_grbm_reload(): vis_layer = layers.BernoulliLayer(num_vis, center=True) hid_layer = layers.GaussianLayer(num_hid, center=True) # create some extrinsics grbm = BoltzmannMachine([vis_layer, hid_layer]) data = batch.Batch({ 'train': batch.InMemoryTable(be.randn((10 * num_samples, num_vis)), num_samples) }) grbm.initialize(data) with tempfile.NamedTemporaryFile() as file: # save the model store = pandas.HDFStore(file.name, mode='w') grbm.save(store) store.close() # reload store = pandas.HDFStore(file.name, mode='r') grbm_reload = BoltzmannMachine.from_saved(store) store.close() # check the two models are consistent vis_data = vis_layer.random((num_samples, num_vis)) data_state = State.from_visible(vis_data, grbm) vis_orig = grbm.deterministic_iteration(1, data_state)[0] vis_reload = grbm_reload.deterministic_iteration(1, data_state)[0] assert be.allclose(vis_orig, vis_reload) assert be.allclose(grbm.layers[0].moments.mean, grbm_reload.layers[0].moments.mean) assert be.allclose(grbm.layers[0].moments.var, grbm_reload.layers[0].moments.var) assert be.allclose(grbm.layers[1].moments.mean, grbm_reload.layers[1].moments.mean) assert be.allclose(grbm.layers[1].moments.var, grbm_reload.layers[1].moments.var)
def example_mnist_tap_machine(paysage_path=None, num_epochs = 10, show_plot=True): num_hidden_units = 256 batch_size = 100 learning_rate = 0.1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = tap_machine.TAP_rbm([vis_layer, hid_layer], num_persistent_samples=0, tolerance_EMF=1e-4, max_iters_EMF=25, terms=2) rbm.initialize(data, 'glorot_normal') perf = fit.ProgressMonitor(data, metrics=['ReconstructionError', 'EnergyDistance']) opt = optimizers.Gradient(stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1), tolerance=1e-4, ascent=True) sgd = fit.SGD(rbm, data, opt, num_epochs, method=fit.tap, monitor=perf) # fit the model print('training with stochastic gradient ascent ') sgd.train() util.show_metrics(rbm, perf) util.show_reconstructions(rbm, data.get('validate'), fit, show_plot) util.show_fantasy_particles(rbm, data.get('validate'), fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def test_clamped_SequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 steps = 1 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler with the visible layer clamped sampler = fit.SequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the sampler state and check the output sampler.update_state(steps, dropout_scale) assert be.allclose(data_state.units[0], sampler.state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], sampler.state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u)
def test_grbm_save(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) grbm = model.Model([vis_layer, hid_layer]) with tempfile.NamedTemporaryFile() as file: store = pandas.HDFStore(file.name, mode='w') grbm.save(store) store.close()
def test_exponential_derivatives(): ly = layers.BernoulliLayer(num_vis) w = layers.Weights((num_vis, num_hid)) vis = ly.random((num_samples, num_vis)) hid = [be.randn((num_samples, num_hid))] weights = [w.W()] beta = be.rand((num_samples, 1)) ly.derivatives(vis, hid, weights, beta)
def test_bernoulli_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += b visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += a # compute conditional parameters with layer funcitons hidden_field_layer = rbm.layers[1]._conditional_params( [vdata], [rbm.weights[0].W()]) visible_field_layer = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_field, hidden_field_layer), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, visible_field_layer), \ "visible field wrong in bernoulli-bernoulli rbm"
def run(num_epochs=5, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=3.0) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l1_adaptive_decay_penalty_2(0.00001)}) rbm.initialize(data, 'glorot_normal') opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4) tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0) sgd = fit.SGD(rbm, data) sgd.monitor.generator_metrics.append(TAPLogLikelihood()) sgd.monitor.generator_metrics.append(TAPFreeEnergy()) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps) util.show_metrics(rbm, sgd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def test_grbm_from_config(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) grbm = model.Model([vis_layer, hid_layer]) config = grbm.get_config() rbm_from_config = model.Model.from_config(config) config_from_config = rbm_from_config.get_config() assert config == config_from_config
def test_grad_normalize_(): num_visible_units = 10 num_hidden_units = 10 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) gu.grad_normalize_(grad) nrm = gu.grad_norm(grad) assert nrm > 1-1e-6 assert nrm < 1+1e-6
def run(num_epochs=1, show_plot=False): num_hidden_units = 1 batch_size = 100 mc_steps = 10 beta_std = 0.6 # set up the reader to get minibatches with batch.in_memory_batch(samples, batch_size, train_fraction=0.95) as data: # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units, center=False) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l2_penalty(0.001)}) # Add regularization term rbm.initialize(data, method='hinton') # Initialize weights cd = fit.SGD(rbm, data) learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) opt = optimizers.ADAM(stepsize=learning_rate) print("Train the model...") cd.train(opt, num_epochs, mcsteps=mc_steps, method=fit.pcd, verbose=False) ''' # write on file KL divergences reverse_KL_div = [ cd.monitor.memory[i]['ReverseKLDivergence'] for i in range(0,len(cd.monitor.memory)) ] KL_div = [ cd.monitor.memory[i]['KLDivergence'] for i in range(0,len(cd.monitor.memory)) ] for i in range(0,len(cd.monitor.memory)): out_file1.write(str(KL_div[i])+" "+str(reverse_KL_div[i])+"\n") out_file1.close() # save weights on file filename = "results/weights/weights-"+temperature[:-4]+".jpg" Gprotein_util.show_weights(rbm, show_plot=False, n_weights=8, Filename=filename, random=False) ''' return rbm
def example_mnist_grbm(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 500 batch_size = 50 learning_rate = 0.001 # gaussian rbm usually requires smaller learnign rate mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=transform, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data) metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore' ] perf = fit.ProgressMonitor(data, metrics=metrics) opt = optimizers.ADAM(stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1)) sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic') cd = fit.SGD(rbm, data, opt, num_epochs, method=fit.pcd, sampler=sampler, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) util.show_reconstructions(rbm, data.get('validate'), fit, show_plot) util.show_fantasy_particles(rbm, data.get('validate'), fit, show_plot) util.show_weights(rbm, show_plot) # close the HDF5 store data.close() print("Done")
def test_grad_norm(): num_visible_units = 1000 num_hidden_units = 1000 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) nrm = gu.grad_norm(grad) assert nrm > math.sqrt(be.float_scalar(num_hidden_units + \ num_visible_units + num_visible_units*num_hidden_units)/3) - 1 assert nrm < math.sqrt(be.float_scalar(num_hidden_units + \ num_visible_units + num_visible_units*num_hidden_units)/3) + 1
def test_bernoulli_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += be.broadcast(b, hidden_field) visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += be.broadcast(a, visible_field) # update the extrinsic parameter using the layer functions rbm.layers[0].update([hdata], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata], [rbm.weights[0].W()]) assert be.allclose(hidden_field, rbm.layers[1].ext_params.field), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, rbm.layers[0].ext_params.field), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_grbm_from_config(): vis_layer = layers.BernoulliLayer(num_vis) hid_layer = layers.GaussianLayer(num_hid) grbm = BoltzmannMachine([vis_layer, hid_layer]) config = grbm.get_config() rbm_from_config = BoltzmannMachine.from_config(config) config_from_config = rbm_from_config.get_config() assert config == config_from_config