def test_exponential_derivatives(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params.loc[:] = a rbm.layers[1].int_params.loc[:] = b rbm.weights[0].int_params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) vdata_scaled = rbm.layers[0].rescale(vdata) # compute the mean of the hidden layer rbm.layers[1].update([vdata], [rbm.weights[0].W()]) hid_mean = rbm.layers[1].mean() hid_mean_scaled = rbm.layers[1].rescale(hid_mean) # compute the derivatives d_visible_loc = be.mean(vdata, axis=0) d_hidden_loc = be.mean(hid_mean_scaled, axis=0) d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata) # compute the derivatives using the layer functions vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled], [rbm.weights[0].W()]) hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled], [rbm.weights[0].W_T()]) weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled) assert be.allclose(d_visible_loc, vis_derivs.loc), \ "derivative of visible loc wrong in exponential-exponential rbm" assert be.allclose(d_hidden_loc, hid_derivs.loc), \ "derivative of hidden loc wrong in exponential-exponential rbm" assert be.allclose(d_W, weight_derivs.matrix), \ "derivative of weights wrong in exponential-exponential rbm"
def test_onehot_conditional_params(): ly = layers.OneHotLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W(trans=True)] beta = be.rand((num_samples, 1)) ly.conditional_params(scaled_units, weights, beta)
def test_exponential_update(): ly = layers.BernoulliLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly.update(scaled_units, weights, beta)
def test_exponential_conditional_params(): ly = layers.ExponentialLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly._conditional_params(scaled_units, weights, beta)
def test_ising_update(): ly = layers.IsingLayer(num_vis) w = layers.Weights((num_vis, num_hid)) scaled_units = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly.update(scaled_units, weights, beta)
def test_in_memory_table_batch(): # create data num_rows = 10000 num_cols = 10 tensor = be.rand((num_rows, num_cols)) # batch it with InMemoryTable batch_size = 1000 num_train_batches = num_rows // batch_size data = batch.InMemoryTable(tensor, batch_size) # loop through, checking the data i_batch = 0 while True: # get the data try: batch_data = data.get() except StopIteration: assert i_batch == num_train_batches i_batch = 0 break # check it assert be.allclose( batch_data, tensor[i_batch * batch_size:(i_batch + 1) * batch_size]) i_batch += 1
def test_bernoulli_derivatives(): ly = layers.BernoulliLayer(num_vis) w = layers.Weights((num_vis, num_hid)) vis = ly.random((num_samples, num_vis)) hid = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly.derivatives(vis, hid, weights, beta)
def test_exponential_derivatives(): ly = layers.ExponentialLayer(num_vis) w = layers.Weights((num_vis, num_hid)) vis = ly.random((num_samples, num_vis)) hid = [be.randn((num_samples, num_hid))] weights = [w.W_T()] beta = be.rand((num_samples, 1)) ly.derivatives(vis, hid, weights, beta)
def test_ising_derivatives(): ly = layers.IsingLayer(num_vis) w = layers.Weights((num_vis, num_hid)) vis = ly.random((num_samples, num_vis)) hid = [be.randn((num_samples, num_hid))] weights = [w.W()] beta = be.rand((num_samples, 1)) ly.derivatives(vis, hid, weights, beta)
def test_exponential_conditional_params(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.weights[0].params.matrix[:] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute conditional parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # compute the conditional parameters using the layer functions hidden_rate_func = rbm.layers[1]._conditional_params([vdata], [rbm.weights[0].W()]) visible_rate_func = rbm.layers[0]._conditional_params( [hdata], [rbm.weights[0].W_T()]) assert be.allclose(hidden_rate, hidden_rate_func), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, visible_rate_func), \ "visible rate wrong in exponential-exponential rbm"
def test_exponential_update(): num_visible_units = 100 num_hidden_units = 50 batch_size = 25 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.ExponentialLayer(num_visible_units) hid_layer = layers.ExponentialLayer(num_hidden_units) rbm = hidden.Model([vis_layer, hid_layer]) # randomly set the intrinsic model parameters # for the exponential layers, we need a > 0, b > 0, and W < 0 a = be.rand((num_visible_units, )) b = be.rand((num_hidden_units, )) W = -be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].int_params['loc'] = a rbm.layers[1].int_params['loc'] = b rbm.weights[0].int_params['matrix'] = W # generate a random batch of data vdata = rbm.layers[0].random((batch_size, num_visible_units)) hdata = rbm.layers[1].random((batch_size, num_hidden_units)) # compute extrinsic parameters hidden_rate = -be.dot(vdata, W) # (batch_size, num_hidden_units) hidden_rate += be.broadcast(b, hidden_rate) visible_rate = -be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units) visible_rate += be.broadcast(a, visible_rate) # update the extrinsic parameter using the layer functions rbm.layers[1].update(vdata, rbm.weights[0].W()) rbm.layers[0].update(hdata, be.transpose(rbm.weights[0].W())) assert be.allclose(hidden_rate, rbm.layers[1].ext_params['rate']), \ "hidden rate wrong in exponential-exponential rbm" assert be.allclose(visible_rate, rbm.layers[0].ext_params['rate']), \ "visible rate wrong in exponential-exponential rbm"
def test_penalties(): for p in penalty_types: penalty = p(1.0, (slice(1, 100, 1), slice(0, 200, 2))) t = be.rand((100, 200)) * 2.0 - be.ones((100, 200)) v1 = penalty.value(t) g = penalty.grad(t) t -= be.EPSILON * g v2 = penalty.value(t) assert v1 >= v2, "penalty {} gradient is not working properly".format( p) penalty = pen.logdet_penalty(1.0, (slice(0, 100, 1), )) t = be.identity(100) + be.rand((100, 100)) * 0.2 - be.ones( (100, 100)) * 0.1 v1 = penalty.value(t) g = penalty.grad(t) t -= be.EPSILON * g v2 = penalty.value(t) assert v1 >= v2, "logdet_penalty gradient is not working properly"
def test_mean(): # create some random data s = be.rand((100000, )) # reference result ref_mean = be.mean(s) # do the online calculation mv = math_utils.MeanVarianceCalculator() for i in range(10): mv.update(s[i * 10000:(i + 1) * 10000]) assert be.allclose(be.float_tensor(np.array([ref_mean])), be.float_tensor(np.array([mv.mean])))
def test_mean_2d(): # create some random data num =5000 num_steps = 10 stepsize = num // num_steps s = be.rand((num,10)) # reference result ref_mean = be.mean(s, axis=0) # do the online calculation mv = math_utils.MeanArrayCalculator() for i in range(num_steps): mv.update(s[i*stepsize:(i+1)*stepsize], axis=0) assert be.allclose(ref_mean, mv.mean)
def test_mean(): # create some random data num = 100000 num_steps = 10 stepsize = num // num_steps s = be.rand((num,)) # reference result ref_mean = be.mean(s) # do the online calculation mv = math_utils.MeanCalculator() for i in range(num_steps): mv.update(s[i*stepsize:(i+1)*stepsize]) assert be.allclose(be.float_tensor(np.array([ref_mean])), be.float_tensor(np.array([mv.mean])))
def test_mean_variance_2d(): # create some random data num = 10000 dim2 = 10 num_steps = 10 stepsize = num // num_steps s = be.rand((num,dim2)) # reference result ref_mean = be.mean(s, axis=0) ref_var = be.var(s, axis=0) # do the online calculation mv = math_utils.MeanVarianceArrayCalculator() for i in range(num_steps): mv.update(s[i*stepsize:(i+1)*stepsize]) assert be.allclose(ref_mean, mv.mean) assert be.allclose(ref_var, mv.var, rtol=1e-3, atol=1e-5)
def test_mean_variance_serialization(): # create some random data num = 100 dim2 = 10 num_steps = 10 stepsize = num // num_steps s = be.rand((num, dim2)) # do the online calculation mv = math_utils.MeanVarianceArrayCalculator() for i in range(num_steps): mv.update(s[i * stepsize:(i + 1) * stepsize]) df = mv.to_dataframe() mv_serial = math_utils.MeanVarianceArrayCalculator.from_dataframe(df) assert be.allclose(mv_serial.mean, mv.mean) assert be.allclose(mv_serial.var, mv.var) assert be.allclose(mv_serial.square, mv.square) assert mv_serial.num == mv.num
def test_bernoulli_log_partition_gradient(): lay = layers.BernoulliLayer(500) lay.params.loc[:] = be.rand_like(lay.params.loc) * 2.0 - 1.0 A = be.rand((1, 500)) B = be.rand_like(A) grad = lay.grad_log_partition_function(A, B) logZ = be.mean(lay.log_partition_function(A, B), axis=0) lr = 0.01 gogogo = True while gogogo: cop = deepcopy(lay) cop.params.loc[:] = lay.params.loc + lr * grad.loc logZ_next = be.mean(cop.log_partition_function(A, B), axis=0) regress = logZ_next - logZ < 0.0 if True in regress: if lr < 1e-6: assert False, \ "gradient of Bernoulli log partition function is wrong" break else: lr *= 0.5 else: break
def test_in_memory_batch(): # create data num_rows = 10000 num_cols = 10 tensor = be.rand((num_rows, num_cols)) # read it back with Batch batch_size = 1000 num_train_batches = num_rows // batch_size with batch.Batch({ 'train': batch.InMemoryTable(tensor, batch_size), 'validate': batch.InMemoryTable(tensor, batch_size) }) as data: # loop through thrice, checking the data i_batch = 0 while True: # get the data try: batch_data_train = data.get("train") batch_data_validate = data.get("validate") except StopIteration: assert i_batch == num_train_batches i_batch = 0 data.reset_generator("all") break # check it assert be.allclose( batch_data_train, tensor[i_batch * batch_size:(i_batch + 1) * batch_size]) assert be.allclose( batch_data_validate, tensor[i_batch * batch_size:(i_batch + 1) * batch_size]) i_batch += 1
def test_conditional_sampling(): """ Test sampling from one layer conditioned on the state of another layer. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 steps = 1000 mean_tol = 0.1 # set a seed for the random number generator be.set_seed() layer_types = [ layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units,)) b = be.rand((num_hidden_units,)) W = 10 * be.rand((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units,)) log_var_b = be.randn((num_hidden_units,)) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(1, rbm) # set up a calculator for the moments moments = mu.MeanVarianceArrayCalculator() for _ in range(steps): moments.update(rbm.layers[0].conditional_sample( rbm._connected_rescaled_units(0, state), rbm._connected_weights(0))) model_mean = rbm.layers[0].conditional_mean( rbm._connected_rescaled_units(0, state), rbm._connected_weights(0)) ave = moments.mean close = be.allclose(ave, model_mean[0], rtol=mean_tol, atol=mean_tol) assert close, "{} conditional mean".format(layer_type) if layer_type == layers.GaussianLayer: model_mean, model_var = rbm.layers[0].conditional_params( rbm._connected_rescaled_units(0, state), rbm._connected_weights(0)) close = be.allclose(be.sqrt(moments.var), be.sqrt(model_var[0]), rtol=mean_tol, atol=mean_tol) assert close, "{} conditional standard deviation".format(layer_type)
import numpy as np from paysage import backends as be from paysage import preprocess as pre import pytest tensors = [be.rand((100, 10)) for _ in range(8)] def compare_lists(a, b): return all([be.allclose(ai, bi) for ai, bi in zip(a, b)]) def test_scale(): # test function result_pre = [pre.scale(tensor, 2) for tensor in tensors] result_ref = [0.5 * tensor for tensor in tensors] assert compare_lists(result_pre, result_ref) # test transform transformer = pre.Transformation(pre.scale, kwargs={'denominator': 2}) result_pre_2 = [transformer.compute(tensor) for tensor in tensors] assert compare_lists(result_pre, result_pre_2) def test_l2_normalize(): result_pre = [ be.norm(pre.l2_normalize(tensor), axis=1) for tensor in tensors ] result_ref = [be.ones((len(tensor), )) for tensor in tensors] assert compare_lists(result_pre, result_ref)
def test_independent(): """ Test sampling from an rbm with two layers connected by a weight matrix that contains all zeros, so that the layers are independent. Note: This test compares values estimated by *sampling* to values computed analytically. It can fail for small batch_size, or strict tolerances, even if everything is working propery. """ num_visible_units = 20 num_hidden_units = 10 batch_size = 1000 steps = 100 mean_tol = 0.2 corr_tol = 0.2 # set a seed for the random number generator be.set_seed() layer_types = [ layers.BernoulliLayer, layers.GaussianLayer] for layer_type in layer_types: # set up some layer and model objects vis_layer = layer_type(num_visible_units) hid_layer = layer_type(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # randomly set the intrinsic model parameters a = be.rand((num_visible_units,)) b = be.rand((num_hidden_units,)) W = be.zeros((num_visible_units, num_hidden_units)) rbm.layers[0].params.loc[:] = a rbm.layers[1].params.loc[:] = b rbm.connections[0].weights.params.matrix[:] = W if layer_type == layers.GaussianLayer: log_var_a = be.randn((num_visible_units,)) log_var_b = be.randn((num_hidden_units,)) rbm.layers[0].params.log_var[:] = log_var_a rbm.layers[1].params.log_var[:] = log_var_b # initialize a state state = State.from_model(batch_size, rbm) # run a markov chain to update the state state = rbm.markov_chain(steps, state) # compute the mean state_for_moments = State.from_model(1, rbm) sample_mean = [be.mean(state[i], axis=0) for i in range(state.len)] model_mean = [rbm.layers[i].conditional_mean( rbm._connected_rescaled_units(i, state_for_moments), rbm._connected_weights(i)) for i in range(rbm.num_layers)] # check that the means are roughly equal for i in range(rbm.num_layers): ave = sample_mean[i] close = be.allclose(ave, model_mean[i][0], rtol=mean_tol, atol=mean_tol) assert close, "{0} {1}: sample mean does not match model mean".format(layer_type, i) # check the cross correlation between the layers crosscov = be.cov(state[0], state[1]) norm = be.outer(be.std(state[0], axis=0), be.std(state[1], axis=0)) crosscorr = be.divide(norm, crosscov) assert be.tmax(be.tabs(crosscorr)) < corr_tol, "{} cross correlation too large".format(layer_type)