def test_onehot_conditional_params(): ly = layers.OneHotLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly._conditional_params(scaled_units, weights, beta)
def test_onehot_derivatives(): ly = layers.OneHotLayer(num_vis) w = layers.Weights((num_vis, num_hid)) vis = ly.random((num_samples, num_vis)) hid = [be.randn((num_samples, num_hid))] weights = [w.W_T()] ly.derivatives(vis, hid, weights)
def test_onehot_build_from_config(): ly = layers.OneHotLayer(num_vis) ly.add_constraint({'loc': constraints.non_negative}) p = penalties.l2_penalty(0.37) ly.add_penalty({'loc': p}) ly_new = layers.layer_from_config(ly.get_config()) assert ly_new.get_config() == ly.get_config()
def test_onehot_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.OneHotLayer(num_visible_units) hid_layer = layers.OneHotLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += b visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += a # compute conditional parameters with layer funcitons hidden_field_layer = rbm.layers[1].conditional_params( [vdata], [rbm.connections[0].W()]) visible_field_layer = rbm.layers[0].conditional_params( [hdata], [rbm.connections[0].W(trans=True)]) assert be.allclose(hidden_field, hidden_field_layer), \ "hidden field wrong in onehot-onehot rbm" assert be.allclose(visible_field, visible_field_layer), \ "visible field wrong in onehot-onehot rbm"
def test_gaussian_1D_1mode_train(): # create some example data num = 10000 mu = 3 sigma = 1 samples = be.randn((num, 1)) * sigma + mu # set up the reader to get minibatches batch_size = 100 samples_train, samples_validate = batch.split_tensor(samples, 0.9) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # parameters learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=0.1) mc_steps = 1 num_epochs = 10 num_sample_steps = 100 # set up the model and initialize the parameters vis_layer = layers.GaussianLayer(1) hid_layer = layers.OneHotLayer(1) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data, method='hinton') # modify the parameters to shift the initialized model from the data # this forces it to train rbm.layers[0].params = layers.ParamsGaussian( rbm.layers[0].params.loc - 3, rbm.layers[0].params.log_var - 1) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with persistent contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # sample data from the trained model model_state = \ samplers.SequentialMC.generate_fantasy_state(rbm, num, num_sample_steps) pts_trained = model_state[0] percent_error = 10 mu_trained = be.mean(pts_trained) assert numpy.abs(mu_trained / mu - 1) < (percent_error / 100) sigma_trained = numpy.sqrt(be.var(pts_trained)) assert numpy.abs(sigma_trained / sigma - 1) < (percent_error / 100)
def run(num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 # set up the reader to get minibatches data = util.create_batch(batch_size, train_fraction=0.95, transform=transform) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.OneHotLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.connections[0].weights.add_penalty({'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='glorot_normal') # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with persistent contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # evaluate the model util.show_metrics(rbm, cd.monitor) valid = data.get('validate') util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5) util.show_weights(rbm, show_plot, n_weights=25) # close the HDF5 store data.close() print("Done")
def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.95) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.OneHotLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.weights[0].add_penalty({'matrix': pen.l2_penalty(0.001)}) rbm.initialize(data, method='glorot_normal') metrics = [ 'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare' ] perf = fit.ProgressMonitor(data, metrics=metrics) # set up the optimizer and the fit method opt = optimizers.ADAM(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, method=fit.pcd, sampler=sampler, mcsteps=mc_steps, monitor=perf) # fit the model print('training with persistent contrastive divergence') cd.train() # evaluate the model util.show_metrics(rbm, perf) valid = data.get('validate') util.show_reconstructions(rbm, valid, fit, show_plot, n_recon=10, vertical=False, num_to_avg=10) util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25) util.show_weights(rbm, show_plot, n_weights=9) # close the HDF5 store data.close() print("Done")
def test_OneHot_creation(): layers.OneHotLayer(num_vis, 0)
def test_onehot_online_param_update(): ly = layers.OneHotLayer(num_vis) vis = ly.random((num_samples, num_vis)) ly.online_param_update(vis)
def test_onehot_energy(): ly = layers.OneHotLayer(num_vis) vis = ly.random((num_samples, num_vis)) ly.energy(vis)
def test_onehot_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.OneHotLayer(num_visible_units) hid_layer = layers.OneHotLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the conditional mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.connections[0].W()]) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()]) weight_derivs = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled) # compute simple weighted derivatives using the layer functions scale = 2 scale_func = partial(be.multiply, be.float_scalar(scale)) vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)], weighting_function=scale_func) hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()], weighting_function=scale_func) weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled, weighting_function=scale_func) assert be.allclose(d_visible_loc, vis_derivs[0].loc), \ "derivative of visible loc wrong in onehot-onehot rbm" assert be.allclose(d_hidden_loc, hid_derivs[0].loc), \ "derivative of hidden loc wrong in onehot-onehot rbm" assert be.allclose(d_W, weight_derivs[0].matrix), \ "derivative of weights wrong in onehot-onehot rbm" assert be.allclose(scale * d_visible_loc, vis_derivs_scaled[0].loc), \ "weighted derivative of visible loc wrong in onehot-onehot rbm" assert be.allclose(scale * d_hidden_loc, hid_derivs_scaled[0].loc), \ "weighted derivative of hidden loc wrong in onehot-onehot rbm" assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \ "weighted derivative of weights wrong in onehot-onehot rbm"