def run(pretrain_epochs=5, finetune_epochs=5, fit_method=fit.LayerwisePretrain, show_plot=False): num_hidden_units = [20**2, 15**2, 10**2] batch_size = 100 mc_steps = 5 beta_std = 0.6 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = [layers.BernoulliLayer(n) for n in num_hidden_units] rbm = BoltzmannMachine([vis_layer] + hid_layer) # add some penalties for c in rbm.connections: c.weights.add_penalty({"matrix": pen.l1_adaptive_decay_penalty_2(1e-4)}) print("Norms of the weights before training") util.weight_norm_histogram(rbm, show_plot=show_plot) print('pre-training with persistent contrastive divergence') cd = fit_method(rbm, data) learning_rate = schedules.PowerLawDecay(initial=5e-3, coefficient=1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, pretrain_epochs, method=fit.pcd, mcsteps=mc_steps, init_method="glorot_normal") util.show_weights(rbm, show_plot, n_weights=16) print('fine tuning') cd = fit.StochasticGradientDescent(rbm, data) cd.monitor.generator_metrics.append(M.JensenShannonDivergence()) learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, finetune_epochs, mcsteps=mc_steps, beta_std=beta_std) util.show_metrics(rbm, cd.monitor) # evaluate the model valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=10, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=16) print("Norms of the weights after training") util.weight_norm_histogram(rbm, show_plot=show_plot) # close the HDF5 store data.close() print("Done") return rbm
def test_gaussian_1D_1mode_train(): # create some example data num = 10000 mu = 3 sigma = 1 samples = be.randn((num, 1)) * sigma + mu # set up the reader to get minibatches batch_size = 100 samples_train, samples_validate = batch.split_tensor(samples, 0.9) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # parameters learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=0.1) mc_steps = 1 num_epochs = 10 num_sample_steps = 100 # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(1) hid_layer = layers.OneHotLayer(1) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data, method='hinton') # modify the parameters to shift the initialized model from the data # this forces it to train rbm.layers[0].params = layers.ParamsGaussian( rbm.layers[0].params.loc - 3, rbm.layers[0].params.log_var - 1) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with persistent contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # sample data from the trained model model_state = \ samplers.SequentialMC.generate_fantasy_state(rbm, num, num_sample_steps) pts_trained = model_state[0] percent_error = 10 mu_trained = be.mean(pts_trained) assert numpy.abs(mu_trained / mu - 1) < (percent_error / 100) sigma_trained = numpy.sqrt(be.var(pts_trained)) assert numpy.abs(sigma_trained / sigma - 1) < (percent_error / 100)
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data, method='glorot_normal') perf = fit.ProgressMonitor(data, metrics=['ReconstructionError', 'EnergyDistance', 'HeatCapacity', 'WeightSparsity', 'WeightSquare']) opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4, ascent=True) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) sgd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.tap, monitor=perf, mcsteps=mc_steps) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train() util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 500 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.GaussianLayer(num_hidden_units) hid_layer.set_fixed_params(["loc", "log_var"]) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data, method="glorot_normal") metrics = ['ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare'] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data, 'stddev') rbm.layers[0].params.log_var[:] = \ be.log(0.05*be.ones_like(rbm.layers[0].params.log_var)) opt = optimizers.ADAM(stepsize=learning_rate) # This example parameter set for TAP uses gradient descent to optimize the # Gibbs free energy: tap = fit.TAP(True, 1.0, 0.01, 100, False, 0.9, 0.001, 0.5) # This example parameter set for TAP uses self-consistent iteration to # optimize the Gibbs free energy: #tap = fit.TAP(False, tolerance=0.001, max_iters=100) sgd = fit.SGD(rbm, data) sgd.monitor.generator_metrics.append(TAPFreeEnergy()) sgd.monitor.generator_metrics.append(TAPLogLikelihood()) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps) util.show_metrics(rbm, sgd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(num_epochs=10, show_plot=False): num_hidden_units = 100 batch_size = 100 mc_steps = 10 beta_std = 0.6 # set up the reader to get minibatches with util.create_batch(batch_size, train_fraction=0.95, transform=transform) as data: # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units, center=False) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='pca') print('training with persistent contrastive divergence') cd = fit.SGD(rbm, data) learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, num_epochs, mcsteps=mc_steps, method=fit.pcd) util.show_metrics(rbm, cd.monitor) # evaluate the model valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5, beta_std=beta_std, fantasy_steps=100) util.show_weights(rbm, show_plot, n_weights=100) print("Done") return rbm
def run(num_epochs=5, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=3.0) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l1_adaptive_decay_penalty_2(0.00001)}) rbm.initialize(data, 'glorot_normal') opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4) tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0) sgd = fit.SGD(rbm, data) sgd.monitor.generator_metrics.append(TAPLogLikelihood()) sgd.monitor.generator_metrics.append(TAPFreeEnergy()) # fit the model print('Training with stochastic gradient ascent using TAP expansion') sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps) util.show_metrics(rbm, sgd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(num_epochs=1, show_plot=False): num_hidden_units = 1 batch_size = 100 mc_steps = 10 beta_std = 0.6 # set up the reader to get minibatches with batch.in_memory_batch(samples, batch_size, train_fraction=0.95) as data: # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units, center=False) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty( {'matrix': pen.l2_penalty(0.001)}) # Add regularization term rbm.initialize(data, method='hinton') # Initialize weights cd = fit.SGD(rbm, data) learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) opt = optimizers.ADAM(stepsize=learning_rate) print("Train the model...") cd.train(opt, num_epochs, mcsteps=mc_steps, method=fit.pcd, verbose=False) ''' # write on file KL divergences reverse_KL_div = [ cd.monitor.memory[i]['ReverseKLDivergence'] for i in range(0,len(cd.monitor.memory)) ] KL_div = [ cd.monitor.memory[i]['KLDivergence'] for i in range(0,len(cd.monitor.memory)) ] for i in range(0,len(cd.monitor.memory)): out_file1.write(str(KL_div[i])+" "+str(reverse_KL_div[i])+"\n") out_file1.close() # save weights on file filename = "results/weights/weights-"+temperature[:-4]+".jpg" Gprotein_util.show_weights(rbm, show_plot=False, n_weights=8, Filename=filename, random=False) ''' return rbm
def run(num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def compute_fantasy_particles(rbm, v_data, fit, n_fantasy=25): grid_size = int(sqrt(n_fantasy)) assert grid_size == sqrt( n_fantasy), "n_fantasy must be the square of an integer" random_samples = rbm.random(v_data) model_state = State.from_visible(random_samples, rbm) schedule = schedules.PowerLawDecay(initial=1.0, coefficient=0.5) fantasy = fit.DrivenSequentialMC(rbm, schedule=schedule) dropout_scale = State.dropout_rescale(rbm) fantasy.set_state(model_state) fantasy.update_state(1000, dropout_scale) v_model = rbm.deterministic_iteration(1, fantasy.state, dropout_scale).units[0] idx = numpy.random.choice(range(len(v_model)), n_fantasy, replace=False) grid = numpy.array([be.to_numpy_array(v_model[i]) for i in idx]) return grid.reshape(grid_size, grid_size, -1)
def run(num_epochs=20, show_plot=False): num_hidden_units = 200 batch_size = 100 mc_steps = 10 beta_std = 0.95 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(data.ncols, center=False) hid_layer = layers.BernoulliLayer(num_hidden_units, center=True) hid_layer.set_fixed_params(hid_layer.get_param_names()) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data, 'pca', epochs = 500, verbose=True) print('training with persistent contrastive divergence') cd = fit.SGD(rbm, data, fantasy_steps=10) cd.monitor.generator_metrics.append(M.JensenShannonDivergence()) learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=5) opt = optimizers.ADAM(stepsize=learning_rate) cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps, beta_std=beta_std, burn_in=1) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=100) # close the HDF5 store data.close() print("Done") return rbm
def test_rbm(paysage_path=None): num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'examples', 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'examples', 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() import pandas samples = pre.binarize_color( be.float_tensor( pandas.read_hdf(shuffled_filepath, key='train/images').values[:10000])) samples_train, samples_validate = batch.split_tensor(samples, 0.95) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = ProgressMonitor() untrained_performance = perf.epoch_update(data, rbm, store=True, show=False) # set up the optimizer and the fit method opt = optimizers.RMSProp(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # obtain an estimate of the reconstruction error after 1 epoch trained_performance = cd.monitor.memory[-1] assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def test_rbm(paysage_path=None): num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = fit.ProgressMonitor(data, metrics=['ReconstructionError']) untrained_performance = perf.check_progress(rbm) # set up the optimizer and the fit method opt = optimizers.RMSProp(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # obtain an estimate of the reconstruction error after 1 epoch trained_performance = perf.check_progress(rbm) assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches import pandas data = batch.InMemoryBatch(pre.binarize_color( be.float_tensor( pandas.read_hdf(shuffled_filepath, key='train/images').as_matrix())), batch_size, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.weights[0].add_penalty({'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='glorot_normal') metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def test_tap_machine(paysage_path=None): num_hidden_units = 10 batch_size = 100 num_epochs = 5 learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=1.0) if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'examples', 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'examples', 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches samples = pre.binarize_color( be.float_tensor( pandas.read_hdf(shuffled_filepath, key='train/images').as_matrix()[:10000])) samples_train, samples_validate = batch.split_tensor(samples, 0.95) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = ProgressMonitor(generator_metrics = \ [ReconstructionError(), TAPLogLikelihood(10), TAPFreeEnergy(10)]) untrained_performance = perf.epoch_update(data, rbm, store=True, show=False) # set up the optimizer and the fit method opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-5) tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0) solver = fit.SGD(rbm, data) solver.monitor.generator_metrics.append(TAPLogLikelihood(10)) solver.monitor.generator_metrics.append(TAPFreeEnergy(10)) # fit the model print('training with stochastic gradient ascent') solver.train(opt, num_epochs, method=tap.tap_update) # obtain an estimate of the reconstruction error after 1 epoch trained_performance = solver.monitor.memory[-1] assert (trained_performance['TAPLogLikelihood'] > untrained_performance['TAPLogLikelihood']), \ "TAP log-likelihood did not increase" assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 100 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.012, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_1_layer = layers.BernoulliLayer(num_hidden_units) hid_2_layer = layers.BernoulliLayer(num_hidden_units) hid_3_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_1_layer, hid_2_layer, hid_3_layer]) rbm.initialize(data, method='glorot_normal') print("Norms of the weights before training") util.weight_norm_histogram(rbm, show_plot=show_plot) # small penalties prevent the weights from consolidating rbm.weights[1].add_penalty({'matrix': pen.logdet_penalty(0.001)}) rbm.weights[2].add_penalty({'matrix': pen.logdet_penalty(0.001)}) metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.LayerwisePretrain(rbm, data, opt, num_epochs, method=fit.pcd, mcsteps=mc_steps, metrics=metrics) # fit the model print('training with persistent contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot) from math import sqrt dim = tuple([28] + [int(sqrt(num_hidden_units)) for _ in range(rbm.num_weights)]) util.show_weights(rbm, show_plot, dim=dim, n_weights=16) util.show_one_hot_reconstructions(rbm, fit, dim=28, n_recon=16, num_to_avg=1) print("Norms of the weights after training") util.weight_norm_histogram(rbm, show_plot=show_plot) # close the HDF5 store data.close() print("Done")