def run(pretrain_epochs=5, finetune_epochs=5, fit_method=fit.LayerwisePretrain, show_plot=False): num_hidden_units = [20**2, 15**2, 10**2] batch_size = 100 mc_steps = 5 beta_std = 0.6 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = [layers.BernoulliLayer(n) for n in num_hidden_units] rbm = BoltzmannMachine([vis_layer] + hid_layer) # add some penalties for c in rbm.connections: c.weights.add_penalty({"matrix": pen.l1_adaptive_decay_penalty_2(1e-4)}) print("Norms of the weights before training") util.weight_norm_histogram(rbm, show_plot=show_plot) print('pre-training with persistent contrastive divergence') cd = fit_method(rbm, data) learning_rate = schedules.PowerLawDecay(initial=5e-3, coefficient=1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, pretrain_epochs, method=fit.pcd, mcsteps=mc_steps, init_method="glorot_normal") util.show_weights(rbm, show_plot, n_weights=16) print('fine tuning') cd = fit.StochasticGradientDescent(rbm, data) cd.monitor.generator_metrics.append(M.JensenShannonDivergence()) learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, finetune_epochs, mcsteps=mc_steps, beta_std=beta_std) util.show_metrics(rbm, cd.monitor) # evaluate the model valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=10, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=16) print("Norms of the weights after training") util.weight_norm_histogram(rbm, show_plot=show_plot) # close the HDF5 store data.close() print("Done") return rbm
def run(num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data, 'stddev') rbm.layers[0].params.log_var[:] = \ be.log(0.05*be.ones_like(rbm.layers[0].params.log_var)) opt = optimizers.ADAM(stepsize=learning_rate) # This example parameter set for TAP uses gradient descent to optimize the # Gibbs free energy: tap = fit.TAP(True, 1.0, 0.01, 100, False, 0.9, 0.001, 0.5) # This example parameter set for TAP uses self-consistent iteration to # optimize the Gibbs free energy: #tap = fit.TAP(False, tolerance=0.001, max_iters=100) sgd = fit.SGD(rbm, data) sgd.monitor.generator_metrics.append(TAPFreeEnergy()) sgd.monitor.generator_metrics.append(TAPLogLikelihood()) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps) util.show_metrics(rbm, sgd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(num_epochs=10, show_plot=False): num_hidden_units = 100 batch_size = 100 mc_steps = 10 beta_std = 0.6 # set up the reader to get minibatches with util.create_batch(batch_size, train_fraction=0.95, transform=transform) as data: # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units, center=False) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='pca') print('training with persistent contrastive divergence') cd = fit.SGD(rbm, data) learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, num_epochs, mcsteps=mc_steps, method=fit.pcd) util.show_metrics(rbm, cd.monitor) # evaluate the model valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=100) print("Done") return rbm
def run(num_epochs=5, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=3.0) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l1_adaptive_decay_penalty_2(0.00001)}) rbm.initialize(data, 'glorot_normal') opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4) tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0) sgd = fit.SGD(rbm, data) sgd.monitor.generator_metrics.append(TAPLogLikelihood()) sgd.monitor.generator_metrics.append(TAPFreeEnergy()) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps) util.show_metrics(rbm, sgd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(num_epochs=20, show_plot=False): num_hidden_units = 200 batch_size = 100 mc_steps = 10 beta_std = 0.95 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(data.ncols, center=False) hid_layer = layers.BernoulliLayer(num_hidden_units, center=True) hid_layer.set_fixed_params(hid_layer.get_param_names()) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data, 'pca', epochs = 500, verbose=True) print('training with persistent contrastive divergence') cd = fit.SGD(rbm, data, fantasy_steps=10) cd.monitor.generator_metrics.append(M.JensenShannonDivergence()) learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=5) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps, beta_std=beta_std, burn_in=1) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=100) # close the HDF5 store data.close() print("Done") return rbm