def test_random_grad(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = model.Model([vis_layer, hid_layer]) # create a gradient object filled with random numbers gu.random_grad(rbm)
def test_grad_mapzip(): num_visible_units = 100 num_hidden_units = 50 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad_1 = gu.random_grad(rbm) grad_2 = gu.random_grad(rbm) gu.grad_mapzip(be.add, grad_1, grad_2)
def test_bernoulli_GFE_derivatives(): # Tests that the GFE derivative update increases GFE versus 100 # random update vectors num_units = 5 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) rbm = BoltzmannMachine([layer_1, layer_2, layer_3]) for i in range(len(rbm.connections)): rbm.connections[i].weights.params.matrix[:] = \ 0.01 * be.randn(rbm.connections[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) grad = rbm._grad_gibbs_free_energy(state) gu.grad_normalize_(grad) for i in range(100): lr = 1.0 gogogo = True random_grad = gu.random_grad(rbm) gu.grad_normalize_(random_grad) while gogogo: cop1 = deepcopy(rbm) lr_mul = partial(be.tmul, lr) cop1.parameter_update(gu.grad_apply(lr_mul, grad)) cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) cop2 = deepcopy(rbm) cop2.parameter_update(gu.grad_apply(lr_mul, random_grad)) cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) regress = cop2_GFE - cop1_GFE < 0.0 if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Bernoulli models" break else: lr *= 0.5 else: break
def test_gaussian_GFE_derivatives_gradient_descent(): num_units = 5 layer_1 = layers.GaussianLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) rbm = BoltzmannMachine([layer_1, layer_2]) for i in range(len(rbm.connections)): rbm.connections[i].weights.params.matrix[:] = \ 0.01 * be.randn(rbm.connections[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state, GFE = rbm.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50) grad = rbm._grad_gibbs_free_energy(state) gu.grad_normalize_(grad) for i in range(100): lr = 0.001 gogogo = True random_grad = gu.random_grad(rbm) gu.grad_normalize_(random_grad) while gogogo: cop1 = deepcopy(rbm) lr_mul = partial(be.tmul, lr) cop1.parameter_update(gu.grad_apply(lr_mul, grad)) cop1_state, cop1_GFE = cop1.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50) cop2 = deepcopy(rbm) cop2.parameter_update(gu.grad_apply(lr_mul, random_grad)) cop2_state, cop2_GFE = cop2.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50) regress = cop2_GFE - cop1_GFE < 0 if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Gaussian models" break else: lr *= 0.5 else: break
def test_grad_normalize_(): num_visible_units = 10 num_hidden_units = 10 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) gu.grad_normalize_(grad) nrm = gu.grad_norm(grad) assert nrm > 1-1e-6 assert nrm < 1+1e-6
def test_grad_norm(): num_visible_units = 1000 num_hidden_units = 1000 # set a seed for the random number generator be.set_seed() # set up some layer and model objects vis_layer = layers.BernoulliLayer(num_visible_units) hid_layer = layers.BernoulliLayer(num_hidden_units) rbm = BoltzmannMachine([vis_layer, hid_layer]) # create a gradient object filled with random numbers grad = gu.random_grad(rbm) nrm = gu.grad_norm(grad) assert nrm > math.sqrt(be.float_scalar(num_hidden_units + \ num_visible_units + num_visible_units*num_hidden_units)/3) - 1 assert nrm < math.sqrt(be.float_scalar(num_hidden_units + \ num_visible_units + num_visible_units*num_hidden_units)/3) + 1