def test_gaussian_GFE_entropy_gradient(): num_units = 5 lay = layers.GaussianLayer(num_units) lay.params.loc[:] = be.rand_like(lay.params.loc) lay.params.log_var[:] = be.randn(be.shape(lay.params.loc)) from cytoolz import compose sum_square = compose(be.tsum, be.square) for itr in range(10): mag = lay.get_random_magnetization() lms = lay.lagrange_multipliers_analytic(mag) entropy = lay.TAP_entropy(mag) lr = 0.001 gogogo = True grad = lay.TAP_magnetization_grad(mag, [], [], []) grad_mag = math.sqrt(be.float_scalar(be.accumulate(sum_square, grad))) normit = partial(be.tmul_, be.float_scalar(1.0/grad_mag)) be.apply_(normit, grad) rand_grad = lay.get_random_magnetization() grad_mag = math.sqrt(be.float_scalar(be.accumulate(sum_square, rand_grad))) normit = partial(be.tmul_, be.float_scalar(1.0/grad_mag)) be.apply_(normit, rand_grad) while gogogo: cop1_mag = deepcopy(mag) cop1_lms = deepcopy(lms) cop2_mag = deepcopy(mag) cop2_lms = deepcopy(lms) cop1_mag.mean[:] = mag.mean + lr * grad.mean cop2_mag.mean[:] = mag.mean + lr * rand_grad.mean cop1_mag.variance[:] = mag.variance + lr * grad.variance cop2_mag.variance[:] = mag.variance + lr * rand_grad.variance lay.clip_magnetization_(cop1_mag) lay.clip_magnetization_(cop2_mag) cop1_lms = lay.lagrange_multipliers_analytic(cop1_mag) cop2_lms = lay.lagrange_multipliers_analytic(cop2_mag) entropy_1 = lay.TAP_entropy(cop1_mag) entropy_2 = lay.TAP_entropy(cop2_mag) regress = entropy_1 - entropy_2 < 0.0 #print(itr, "[",lr, "] ", entropy, entropy_1, entropy_2, regress) if regress: #print(grad, rand_grad) if lr < 1e-6: assert False,\ "Gaussian GFE magnetization gradient is wrong" break else: lr *= 0.5 else: break
def test_bernoulli_log_partition_gradient(): lay = layers.BernoulliLayer(500) lay.params.loc[:] = be.rand_like(lay.params.loc) * 2.0 - 1.0 A = be.rand((1, 500)) B = be.rand_like(A) grad = lay.grad_log_partition_function(A, B) logZ = be.mean(lay.log_partition_function(A, B), axis=0) lr = 0.01 gogogo = True while gogogo: cop = deepcopy(lay) cop.params.loc[:] = lay.params.loc + lr * grad.loc logZ_next = be.mean(cop.log_partition_function(A, B), axis=0) regress = logZ_next - logZ < 0.0 if True in regress: if lr < 1e-6: assert False, \ "gradient of Bernoulli log partition function is wrong" break else: lr *= 0.5 else: break
def test_bernoulli_GFE_derivatives(): # Tests that the GFE derivative update increases GFE versus 100 # random update vectors num_units = 5 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) rbm = BoltzmannMachine([layer_1, layer_2, layer_3]) for i in range(len(rbm.connections)): rbm.connections[i].weights.params.matrix[:] = \ 0.01 * be.randn(rbm.connections[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) grad = rbm._grad_gibbs_free_energy(state) gu.grad_normalize_(grad) for i in range(100): lr = 1.0 gogogo = True random_grad = gu.random_grad(rbm) gu.grad_normalize_(random_grad) while gogogo: cop1 = deepcopy(rbm) lr_mul = partial(be.tmul, lr) cop1.parameter_update(gu.grad_apply(lr_mul, grad)) cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) cop2 = deepcopy(rbm) cop2.parameter_update(gu.grad_apply(lr_mul, random_grad)) cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) regress = cop2_GFE - cop1_GFE < 0.0 if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Bernoulli models" break else: lr *= 0.5 else: break
def test_gaussian_GFE_derivatives_gradient_descent(): num_units = 5 layer_1 = layers.GaussianLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) rbm = BoltzmannMachine([layer_1, layer_2]) for i in range(len(rbm.connections)): rbm.connections[i].weights.params.matrix[:] = \ 0.01 * be.randn(rbm.connections[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state, GFE = rbm.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50) grad = rbm._grad_gibbs_free_energy(state) gu.grad_normalize_(grad) for i in range(100): lr = 0.001 gogogo = True random_grad = gu.random_grad(rbm) gu.grad_normalize_(random_grad) while gogogo: cop1 = deepcopy(rbm) lr_mul = partial(be.tmul, lr) cop1.parameter_update(gu.grad_apply(lr_mul, grad)) cop1_state, cop1_GFE = cop1.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50) cop2 = deepcopy(rbm) cop2.parameter_update(gu.grad_apply(lr_mul, random_grad)) cop2_state, cop2_GFE = cop2.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50) regress = cop2_GFE - cop1_GFE < 0 if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Gaussian models" break else: lr *= 0.5 else: break
def test_bernoulli_GFE_derivatives(): num_units = 500 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) rbm = model.Model([layer_1, layer_2, layer_3]) for i in range(len(rbm.weights)): rbm.weights[i].params.matrix[:] = \ 0.01 * be.randn(rbm.weights[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) GFE = rbm.gibbs_free_energy(state) lr = 0.1 gogogo = True grad = rbm.grad_TAP_free_energy(0.1, 1e-7, 50) while gogogo: cop = deepcopy(rbm) lr_mul = partial(be.tmul, -lr) delta = gu.grad_apply(lr_mul, grad) cop.parameter_update(delta) cop_state = cop.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50) cop_GFE = cop.gibbs_free_energy(cop_state) regress = cop_GFE - GFE < 0.0 print(lr, cop_GFE, GFE, cop_GFE - GFE, regress) if regress: if lr < 1e-6: assert False, \ "TAP FE gradient is not working properly for Bernoulli models" break else: lr *= 0.5 else: break
def test_gaussian_Compute_StateTAP_GD(): num_units = 10 layer_1 = layers.GaussianLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) rbm = BoltzmannMachine([layer_1, layer_2]) for i in range(len(rbm.connections)): rbm.connections[i].weights.params.matrix[:] = \ 0.01 * be.randn(rbm.connections[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) for i in range(100): random_state = StateTAP.from_model_rand(rbm) GFE = rbm.gibbs_free_energy(random_state.cumulants) _,min_GFE = rbm._compute_StateTAP_GD(seed=random_state) if GFE - min_GFE < 0.0: assert False, \ "compute_StateTAP_self_consistent is not reducing the GFE"
def test_bernoulli_GFE_magnetization_gradient(): num_units = 500 layer_1 = layers.BernoulliLayer(num_units) layer_2 = layers.BernoulliLayer(num_units) layer_3 = layers.BernoulliLayer(num_units) layer_4 = layers.BernoulliLayer(num_units) rbm = model.Model([layer_1, layer_2, layer_3, layer_4]) for i in range(len(rbm.weights)): rbm.weights[i].params.matrix[:] = \ 0.01 * be.randn(rbm.weights[i].shape) for lay in rbm.layers: lay.params.loc[:] = be.rand_like(lay.params.loc) state = mu.StateTAP.from_model_rand(rbm) GFE = rbm.gibbs_free_energy(state) lr = 0.001 gogogo = True grad = rbm._TAP_magnetization_grad(state) while gogogo: cop = deepcopy(state) for i in range(rbm.num_layers): cop.cumulants[ i].mean[:] = state.cumulants[i].mean + lr * grad[i].mean GFE_next = rbm.gibbs_free_energy(cop) regress = GFE_next - GFE < 0.0 if regress: if lr < 1e-6: assert False,\ "Bernoulli GFE magnetization gradient is wrong" break else: lr *= 0.5 else: break