def test_bernoulli_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata], [rbm.weights[0].W()]) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs.loc), \ "derivative of visible loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_hidden_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in bernoulli-bernoulli rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in bernoulli-bernoulli rbm"
def test_state_for_grad_DrivenSequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler sampler = fit.DrivenSequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the state of the hidden layer grad_state = sampler.state_for_grad(1, dropout_scale) assert be.allclose(data_state.units[0], grad_state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], grad_state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u) # compute the conditional mean with the layer function ave = rbm.layers[1].conditional_mean( rbm._connected_rescaled_units(1, data_state, dropout_scale), rbm._connected_weights(1)) assert be.allclose(ave, grad_state.units[1]), \ "hidden layer of grad_state should be conditional mean: {}".format(u)
def test_random_grad(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # create a gradient object filled with random numbers gu.random_grad(rbm)
def test_zero_grad(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with zeros gu.zero_grad(rbm)
def test_find_k_nearest_neighbors(): n = 20 shp = (20, n) perm = be.rand_int(0, 20, (20, )) k = 1 be.set_seed() y = be.randn(shp) x = y[perm] indices, _distances = math_utils.find_k_nearest_neighbors(x, y, k) assert be.allclose(indices, perm) assert be.allclose(_distances, be.zeros((20, )), 1e-2, 1e-2)
def test_grad_accumulate(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) gu.grad_accumulate(be.norm, grad)
def test_clamped_SequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 steps = 1 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) dropout_scale = State.dropout_rescale(rbm) # since we set no dropout, dropout_scale should be None assert dropout_scale is None for u in [ 'markov_chain', 'mean_field_iteration', 'deterministic_iteration' ]: # set up the sampler with the visible layer clamped sampler = fit.SequentialMC(rbm, updater=u, clamped=[0]) sampler.set_state(data_state) # update the sampler state and check the output sampler.update_state(steps, dropout_scale) assert be.allclose(data_state.units[0], sampler.state.units[0]), \ "visible layer is clamped, and shouldn't get updated: {}".format(u) assert not be.allclose(data_state.units[1], sampler.state.units[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u)
def test_exponential_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # compute the conditional parameters using the layer functions hidden_rate_func = rbm.layers[1]._conditional_params([vdata], [rbm.weights[0].W()]) visible_rate_func = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_rate, hidden_rate_func), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, visible_rate_func), \ "visible rate wrong in exponential-exponential rbm"
def test_bernoulli_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += b visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += a # compute conditional parameters with layer funcitons hidden_field_layer = rbm.layers[1]._conditional_params( [vdata], [rbm.weights[0].W()]) visible_field_layer = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_field, hidden_field_layer), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, visible_field_layer), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_grad_normalize_(): num_visible_units = 10 num_hidden_units = 10 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) gu.grad_normalize_(grad) nrm = gu.grad_norm(grad) assert nrm > 1-1e-6 assert nrm < 1+1e-6
def test_exponential_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params['loc'] = a rbm.layers[1].int_params['loc'] = b rbm.weights[0].int_params['matrix'] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # update the extrinsic parameter using the layer functions rbm.layers[1].update(vdata, rbm.weights[0].W()) rbm.layers[0].update(hdata, be.transpose(rbm.weights[0].W())) assert be.allclose(hidden_rate, rbm.layers[1].ext_params['rate']), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, rbm.layers[0].ext_params['rate']), \ "visible rate wrong in exponential-exponential rbm"
def test_bernoulli_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_field += be.broadcast(b, hidden_field) visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_field += be.broadcast(a, visible_field) # update the extrinsic parameter using the layer functions rbm.layers[0].update([hdata], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata], [rbm.weights[0].W()]) assert be.allclose(hidden_field, rbm.layers[1].ext_params.field), \ "hidden field wrong in bernoulli-bernoulli rbm" assert be.allclose(visible_field, rbm.layers[0].ext_params.field), \ "visible field wrong in bernoulli-bernoulli rbm"
def test_pdist(): n = 500 a_shape = (1000, n) b_shape = (1000, n) # distance distributions a_mean, a_scale = 1, 1 b_mean, b_scale = -1, 1 be.set_seed() a = a_mean + a_scale * be.randn(a_shape) b = b_mean + b_scale * be.randn(b_shape) dists = math_utils.pdist(a, b) dists_t = math_utils.pdist(b, a) assert be.shape(dists) == (1000, 1000) assert be.allclose(be.transpose(dists_t), dists) assert be.mean(dists) > 2 * math.sqrt(n) and be.mean( dists) < 3 * math.sqrt(n)
def test_grad_norm(): num_visible_units = 1000 num_hidden_units = 1000 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) nrm = gu.grad_norm(grad) assert nrm > math.sqrt(be.float_scalar(num_hidden_units + \ num_visible_units + num_visible_units*num_hidden_units)/3) - 1 assert nrm < math.sqrt(be.float_scalar(num_hidden_units + \ num_visible_units + num_visible_units*num_hidden_units)/3) + 1
def test_unclamped_DrivenSequentialMC(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 steps = 1 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) data_state = State.from_visible(vdata, rbm) for u in ['markov_chain', 'mean_field_iteration', 'deterministic_iteration']: # set up the sampler with the visible layer clamped sampler = samplers.SequentialMC(rbm, updater=u) sampler.set_state(data_state) # update the sampler state and check the output sampler.update_state(steps) assert not be.allclose(data_state[0], sampler.state[0]), \ "visible layer is not clamped, and should get updated: {}".format(u) assert not be.allclose(data_state[1], sampler.state[1]), \ "hidden layer is not clamped, and should get updated: {}".format(u)
def test_gaussian_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.layers[0].int_params.log_var[:] = log_var_a rbm.layers[1].int_params.log_var[:] = log_var_b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) visible_var = be.exp(log_var_a) vdata_scaled = vdata / be.broadcast(visible_var, vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) hidden_var = be.exp(log_var_b) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_vis_loc = -be.mean(vdata_scaled, axis=0) d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0) d_vis_logvar += be.batch_dot( hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata) d_vis_logvar /= visible_var d_hid_loc = -be.mean(hid_mean_scaled, axis=0) d_hid_logvar = -0.5 * be.mean( be.square(hid_mean - be.broadcast(b, hid_mean)), axis=0) d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean, axis=0) / len(hid_mean) d_hid_logvar /= hidden_var d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled) # compute the derivatives using the layer functions rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) rbm.layers[0].update([hid_mean_scaled], [rbm.weights[0].W_T()]) vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled) assert be.allclose(d_vis_loc, vis_derivs.loc), \ "derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(d_vis_logvar, vis_derivs.log_var, rtol=1e-05, atol=1e-01), \ "derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_logvar, hid_derivs.log_var, rtol=1e-05, atol=1e-01), \ "derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in gaussian-gaussian rbm"
def test_gaussian_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units, )) b = be.randn((num_hidden_units, )) log_var_a = 0.1 * be.randn((num_visible_units, )) log_var_b = 0.1 * be.randn((num_hidden_units, )) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.layers[0].int_params.log_var[:] = log_var_a rbm.layers[1].int_params.log_var[:] = log_var_b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute the variance visible_var = be.exp(log_var_a) hidden_var = be.exp(log_var_b) # rescale the data vdata_scaled = vdata / be.broadcast(visible_var, vdata) hdata_scaled = hdata / be.broadcast(hidden_var, hdata) # test rescale assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\ "visible rescale wrong in gaussian-gaussian rbm" assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\ "hidden rescale wrong in gaussian-gaussian rbm" # compute the mean hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units) hidden_mean += be.broadcast(b, hidden_mean) visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units) visible_mean += be.broadcast(a, visible_mean) # update the extrinsic parameters using the layer functions rbm.layers[0].update([hdata_scaled], [rbm.weights[0].W_T()]) rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()]) assert be.allclose(visible_var, rbm.layers[0].ext_params.variance),\ "visible variance wrong in gaussian-gaussian rbm" assert be.allclose(hidden_var, rbm.layers[1].ext_params.variance),\ "hidden variance wrong in gaussian-gaussian rbm" assert be.allclose(visible_mean, rbm.layers[0].ext_params.mean),\ "visible mean wrong in gaussian-gaussian rbm" assert be.allclose(hidden_mean, rbm.layers[1].ext_params.mean),\ "hidden mean wrong in gaussian-gaussian rbm"
def test_conditional_sampling(): """ Test sampling from one layer conditioned on the state of another layer. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 steps = 1000 mean_tol = 0.1 # set a seed for the random number generator be.set_seed() layer_types = [ layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units,)) b = be.rand((num_hidden_units,)) W = 10 * be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units,)) log_var_b = be.randn((num_hidden_units,)) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(1, rbm) # set up a calculator for the moments moments = mu.MeanVarianceArrayCalculator() for _ in range(steps): moments.update(rbm.layers[0].conditional_sample( rbm._connected_rescaled_units(0, state), rbm._connected_weights(0))) model_mean = rbm.layers[0].conditional_mean( rbm._connected_rescaled_units(0, state), rbm._connected_weights(0)) ave = moments.mean close = be.allclose(ave, model_mean[0], rtol=mean_tol, atol=mean_tol) assert close, "{} conditional mean".format(layer_type) if layer_type == layers.GaussianLayer: model_mean, model_var = rbm.layers[0].conditional_params( rbm._connected_rescaled_units(0, state), rbm._connected_weights(0)) close = be.allclose(be.sqrt(moments.var), be.sqrt(model_var[0]), rtol=mean_tol, atol=mean_tol) assert close, "{} conditional standard deviation".format(layer_type)
def test_tap_machine(paysage_path=None): num_hidden_units = 10 batch_size = 100 num_epochs = 5 learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=1.0) if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'examples', 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'examples', 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches samples = pre.binarize_color( be.float_tensor( pandas.read_hdf(shuffled_filepath, key='train/images').as_matrix()[:10000])) samples_train, samples_validate = batch.split_tensor(samples, 0.95) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = ProgressMonitor(generator_metrics = \ [ReconstructionError(), TAPLogLikelihood(10), TAPFreeEnergy(10)]) untrained_performance = perf.epoch_update(data, rbm, store=True, show=False) # set up the optimizer and the fit method opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-5) tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0) solver = fit.SGD(rbm, data) solver.monitor.generator_metrics.append(TAPLogLikelihood(10)) solver.monitor.generator_metrics.append(TAPFreeEnergy(10)) # fit the model print('training with stochastic gradient ascent') solver.train(opt, num_epochs, method=tap.tap_update) # obtain an estimate of the reconstruction error after 1 epoch trained_performance = solver.monitor.memory[-1] assert (trained_performance['TAPLogLikelihood'] > untrained_performance['TAPLogLikelihood']), \ "TAP log-likelihood did not increase" assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def test_onehot_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.OneHotLayer(num_visible_units) hid_layer = layers.OneHotLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the conditional mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.connections[0].W()]) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = -be.mean(vdata, axis=0) d_hidden_loc = -be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()]) weight_derivs = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled) # compute simple weighted derivatives using the layer functions scale = 2 scale_func = partial(be.multiply, be.float_scalar(scale)) vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)], weighting_function=scale_func) hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()], weighting_function=scale_func) weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled, weighting_function=scale_func) assert be.allclose(d_visible_loc, vis_derivs[0].loc), \ "derivative of visible loc wrong in onehot-onehot rbm" assert be.allclose(d_hidden_loc, hid_derivs[0].loc), \ "derivative of hidden loc wrong in onehot-onehot rbm" assert be.allclose(d_W, weight_derivs[0].matrix), \ "derivative of weights wrong in onehot-onehot rbm" assert be.allclose(scale * d_visible_loc, vis_derivs_scaled[0].loc), \ "weighted derivative of visible loc wrong in onehot-onehot rbm" assert be.allclose(scale * d_hidden_loc, hid_derivs_scaled[0].loc), \ "weighted derivative of hidden loc wrong in onehot-onehot rbm" assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \ "weighted derivative of weights wrong in onehot-onehot rbm"
def test_gaussian_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) log_var_a = 0.1 * be.randn((num_visible_units,)) log_var_b = 0.1 * be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) visible_var = be.exp(log_var_a) vdata_scaled = vdata / visible_var # compute the mean of the hidden layer hid_mean = rbm.layers[1].conditional_mean( [vdata_scaled], [rbm.connections[0].W()]) hidden_var = be.exp(log_var_b) hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_vis_loc = be.mean((a-vdata)/visible_var, axis=0) d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0) d_vis_logvar += be.batch_quadratic(hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata) d_vis_logvar /= visible_var d_hid_loc = be.mean((b-hid_mean)/hidden_var, axis=0) d_hid_logvar = -0.5 * be.mean(be.square(hid_mean - b), axis=0) d_hid_logvar += be.batch_quadratic(vdata_scaled, W, hid_mean, axis=0) / len(hid_mean) d_hid_logvar /= hidden_var d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()]) weight_derivs = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled) # compute simple weighted derivatives using the layer functions scale = 2 scale_func = partial(be.multiply, be.float_scalar(scale)) vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.connections[0].W(trans=True)], weighting_function=scale_func) hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.connections[0].W()], weighting_function=scale_func) weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled, weighting_function=scale_func) assert be.allclose(d_vis_loc, vis_derivs[0].loc), \ "derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_loc, hid_derivs[0].loc), \ "derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(d_vis_logvar, vis_derivs[0].log_var, rtol=1e-05, atol=1e-01), \ "derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_hid_logvar, hid_derivs[0].log_var, rtol=1e-05, atol=1e-01), \ "derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(d_W, weight_derivs[0].matrix), \ "derivative of weights wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_vis_loc, vis_derivs_scaled[0].loc), \ "weighted derivative of visible loc wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_hid_loc, hid_derivs_scaled[0].loc), \ "weighted derivative of hidden loc wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_vis_logvar, vis_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \ "weighted derivative of visible log_var wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_hid_logvar, hid_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \ "weighted derivative of hidden log_var wrong in gaussian-gaussian rbm" assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \ "weighted derivative of weights wrong in gaussian-gaussian rbm"
def test_rbm(paysage_path=None): num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'examples', 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'examples', 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() import pandas samples = pre.binarize_color( be.float_tensor( pandas.read_hdf(shuffled_filepath, key='train/images').values[:10000])) samples_train, samples_validate = batch.split_tensor(samples, 0.95) data = batch.Batch({ 'train': batch.InMemoryTable(samples_train, batch_size), 'validate': batch.InMemoryTable(samples_validate, batch_size) }) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = ProgressMonitor() untrained_performance = perf.epoch_update(data, rbm, store=True, show=False) # set up the optimizer and the fit method opt = optimizers.RMSProp(stepsize=learning_rate) cd = fit.SGD(rbm, data) # fit the model print('training with contrastive divergence') cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps) # obtain an estimate of the reconstruction error after 1 epoch trained_performance = cd.monitor.memory[-1] assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def test_independent(): """ Test sampling from an rbm with two layers connected by a weight matrix that contains all zeros, so that the layers are independent. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 batch_size = 1000 steps = 100 mean_tol = 0.2 corr_tol = 0.2 # set a seed for the random number generator be.set_seed() layer_types = [ layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units,)) b = be.rand((num_hidden_units,)) W = be.zeros((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units,)) log_var_b = be.randn((num_hidden_units,)) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(batch_size, rbm) # run a markov chain to update the state state = rbm.markov_chain(steps, state) # compute the mean state_for_moments = State.from_model(1, rbm) sample_mean = [be.mean(state[i], axis=0) for i in range(state.len)] model_mean = [rbm.layers[i].conditional_mean( rbm._connected_rescaled_units(i, state_for_moments), rbm._connected_weights(i)) for i in range(rbm.num_layers)] # check that the means are roughly equal for i in range(rbm.num_layers): ave = sample_mean[i] close = be.allclose(ave, model_mean[i][0], rtol=mean_tol, atol=mean_tol) assert close, "{0} {1}: sample mean does not match model mean".format(layer_type, i) # check the cross correlation between the layers crosscov = be.cov(state[0], state[1]) norm = be.outer(be.std(state[0], axis=0), be.std(state[1], axis=0)) crosscorr = be.divide(norm, crosscov) assert be.tmax(be.tabs(crosscorr)) < corr_tol, "{} cross correlation too large".format(layer_type)
def test_tap_machine(paysage_path=None): num_hidden_units = 10 batch_size = 50 num_epochs = 1 learning_rate = 0.01 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.1) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = tap_machine.TAP_rbm([vis_layer, hid_layer], tolerance_EMF=1e-2, max_iters_EMF=50) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = fit.ProgressMonitor(data, metrics=['ReconstructionError']) untrained_performance = perf.check_progress(rbm) # set up the optimizer and the fit method opt = optimizers.Gradient(stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1), tolerance=1e-3, ascent=True) solver = fit.SGD(rbm, data, opt, num_epochs, method=fit.tap, monitor=perf) # fit the model print('training with stochastic gradient ascent') solver.train() # obtain an estimate of the reconstruction error after 1 epoch trained_performance = perf.check_progress(rbm) assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def test_rbm(paysage_path=None): num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = fit.ProgressMonitor(data, metrics=['ReconstructionError']) untrained_performance = perf.check_progress(rbm) # set up the optimizer and the fit method opt = optimizers.RMSProp(stepsize=learning_rate) sampler = fit.DrivenSequentialMC.from_batch(rbm, data) cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd, mcsteps=mc_steps, monitor=perf) # fit the model print('training with contrastive divergence') cd.train() # obtain an estimate of the reconstruction error after 1 epoch trained_performance = perf.check_progress(rbm) assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
def test_rbm(paysage_path=None): """TODO : this is just a placeholder, need to clean up & simplifiy setup. Also need to figure how to deal with consistent random seeding throughout the codebase to obtain deterministic checkable results. """ num_hidden_units = 50 batch_size = 50 num_epochs = 1 learning_rate = 0.01 mc_steps = 1 if not paysage_path: paysage_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5') if not os.path.exists(filepath): raise IOError( "{} does not exist. run mnist/download_mnist.py to fetch from the web" .format(filepath)) shuffled_filepath = os.path.join(paysage_path, 'mnist', 'shuffled_mnist.h5') # shuffle the data if not os.path.exists(shuffled_filepath): shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0) shuffler.shuffle() # set a seed for the random number generator be.set_seed() # set up the reader to get minibatches data = batch.Batch(shuffled_filepath, 'train/images', batch_size, transform=batch.binarize_color, train_fraction=0.99) # set up the model and initialize the parameters vis_layer = layers.BernoulliLayer(data.ncols) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) rbm.initialize(data) # obtain initial estimate of the reconstruction error perf = fit.ProgressMonitor(0, data, metrics=[M.ReconstructionError()]) untrained_performance = perf.check_progress(rbm, 0) # set up the optimizer and the fit method opt = optimizers.RMSProp(rbm, stepsize=learning_rate, scheduler=optimizers.PowerLawDecay(0.1)) sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic') cd = fit.PCD(rbm, data, opt, sampler, num_epochs, mcsteps=mc_steps, skip=200, metrics=[M.ReconstructionError()]) # fit the model print('training with contrastive divergence') cd.train() # obtain an estimate of the reconstruction error after 1 epoch trained_performance = perf.check_progress(rbm, 0) assert (trained_performance['ReconstructionError'] < untrained_performance['ReconstructionError']), \ "Reconstruction error did not decrease" # close the HDF5 store data.close()
from paysage import batch from paysage import preprocess as pre from paysage import layers from paysage.models import model from paysage import fit from paysage import optimizers from paysage import backends as be from paysage import schedules from paysage import penalties as pen be.set_seed(137) # for determinism import example_util as util def run(paysage_path=None, num_epochs=10, show_plot=False): num_hidden_units = 256 batch_size = 100 learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1) mc_steps = 1 (_, _, shuffled_filepath) = \ util.default_paths(paysage_path) # set up the reader to get minibatches data = batch.HDFBatch(shuffled_filepath, 'train/images', batch_size, transform=pre.binarize_color, train_fraction=0.95)
def test_gaussian_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.GaussianLayer(num_visible_units) hid_layer = layers.GaussianLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.randn((num_visible_units,)) b = be.randn((num_hidden_units,)) log_var_a = 0.1 * be.randn((num_visible_units,)) log_var_b = 0.1 * be.randn((num_hidden_units,)) W = be.randn((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b rbm.connections[0].weights.params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute the variance visible_var = be.exp(log_var_a) hidden_var = be.exp(log_var_b) # rescale the data vdata_scaled = vdata / visible_var hdata_scaled = hdata / hidden_var # test rescale assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\ "visible rescale wrong in gaussian-gaussian rbm" assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\ "hidden rescale wrong in gaussian-gaussian rbm" # compute the mean hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units) hidden_mean += b visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units) visible_mean += a # update the conditional parameters using the layer functions vis_mean_func, vis_var_func = rbm.layers[0].conditional_params( [hdata_scaled], [rbm.connections[0].W(trans=True)]) hid_mean_func, hid_var_func = rbm.layers[1].conditional_params( [vdata_scaled], [rbm.connections[0].W()]) assert be.allclose(visible_var, vis_var_func),\ "visible variance wrong in gaussian-gaussian rbm" assert be.allclose(hidden_var, hid_var_func),\ "hidden variance wrong in gaussian-gaussian rbm" assert be.allclose(visible_mean, vis_mean_func),\ "visible mean wrong in gaussian-gaussian rbm" assert be.allclose(hidden_mean, hid_mean_func),\ "hidden mean wrong in gaussian-gaussian rbm"