Ejemplo n.º 1
0
def test_weights_derivative():
    ly = layers.Weights((num_vis, num_hid))
    p = penalties.l2_penalty(0.37)
    ly.add_penalty({'matrix': p})
    vis = be.randn((num_samples, num_vis))
    hid = be.randn((num_samples, num_hid))
    derivs = ly.derivatives(vis, hid)
Ejemplo n.º 2
0
def test_state_for_grad_DrivenSequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)
    dropout_scale = State.dropout_rescale(rbm)

    # since we set no dropout, dropout_scale should be None
    assert dropout_scale is None

    for u in [
            'markov_chain', 'mean_field_iteration', 'deterministic_iteration'
    ]:
        # set up the sampler
        sampler = fit.DrivenSequentialMC(rbm, updater=u, clamped=[0])
        sampler.set_state(data_state)

        # update the state of the hidden layer
        grad_state = sampler.state_for_grad(1, dropout_scale)

        assert be.allclose(data_state.units[0], grad_state.units[0]), \
        "visible layer is clamped, and shouldn't get updated: {}".format(u)

        assert not be.allclose(data_state.units[1], grad_state.units[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)

        # compute the conditional mean with the layer function
        ave = rbm.layers[1].conditional_mean(
            rbm._connected_rescaled_units(1, data_state, dropout_scale),
            rbm._connected_weights(1))

        assert be.allclose(ave, grad_state.units[1]), \
        "hidden layer of grad_state should be conditional mean: {}".format(u)
Ejemplo n.º 3
0
def test_bernoulli_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = hidden.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].int_params.loc[:] = a
    rbm.layers[1].int_params.loc[:] = b
    rbm.weights[0].int_params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    vdata_scaled = rbm.layers[0].rescale(vdata)

    # compute the mean of the hidden layer
    rbm.layers[1].update([vdata], [rbm.weights[0].W()])
    hid_mean = rbm.layers[1].mean()
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_visible_loc = -be.mean(vdata, axis=0)
    d_hidden_loc = -be.mean(hid_mean_scaled, axis=0)
    d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                           [rbm.weights[0].W()])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.weights[0].W_T()])

    weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled)

    assert be.allclose(d_visible_loc, vis_derivs.loc), \
    "derivative of visible loc wrong in bernoulli-bernoulli rbm"

    assert be.allclose(d_hidden_loc, hid_derivs.loc), \
    "derivative of hidden loc wrong in bernoulli-bernoulli rbm"

    assert be.allclose(d_W, weight_derivs.matrix), \
    "derivative of weights wrong in bernoulli-bernoulli rbm"
Ejemplo n.º 4
0
def test_exponential_conditional_params():
    ly = layers.ExponentialLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    scaled_units = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    beta = be.rand((num_samples, 1))
    ly._conditional_params(scaled_units, weights, beta)
Ejemplo n.º 5
0
def test_onehot_conditional_params():
    ly = layers.OneHotLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    scaled_units = [be.randn((num_samples, num_hid))]
    weights = [w.W(trans=True)]
    beta = be.rand((num_samples, 1))
    ly.conditional_params(scaled_units, weights, beta)
Ejemplo n.º 6
0
def test_grbm_reload():
    vis_layer = layers.BernoulliLayer(num_vis, center=True)
    hid_layer = layers.GaussianLayer(num_hid, center=True)

    # create some extrinsics
    grbm = BoltzmannMachine([vis_layer, hid_layer])
    data = batch.Batch({
        'train':
        batch.InMemoryTable(be.randn((10 * num_samples, num_vis)), num_samples)
    })
    grbm.initialize(data)
    with tempfile.NamedTemporaryFile() as file:
        # save the model
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
        # reload
        store = pandas.HDFStore(file.name, mode='r')
        grbm_reload = BoltzmannMachine.from_saved(store)
        store.close()
    # check the two models are consistent
    vis_data = vis_layer.random((num_samples, num_vis))
    data_state = State.from_visible(vis_data, grbm)
    vis_orig = grbm.deterministic_iteration(1, data_state)[0]
    vis_reload = grbm_reload.deterministic_iteration(1, data_state)[0]
    assert be.allclose(vis_orig, vis_reload)
    assert be.allclose(grbm.layers[0].moments.mean,
                       grbm_reload.layers[0].moments.mean)
    assert be.allclose(grbm.layers[0].moments.var,
                       grbm_reload.layers[0].moments.var)
    assert be.allclose(grbm.layers[1].moments.mean,
                       grbm_reload.layers[1].moments.mean)
    assert be.allclose(grbm.layers[1].moments.var,
                       grbm_reload.layers[1].moments.var)
Ejemplo n.º 7
0
def test_gaussian_derivatives():
    ly = layers.GaussianLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    vis = ly.random((num_samples, num_vis))
    hid = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    ly.derivatives(vis, hid, weights)
Ejemplo n.º 8
0
def test_bernoulli_derivatives():
    ly = layers.BernoulliLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    vis = ly.random((num_samples, num_vis))
    hid = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    ly.derivatives(vis, hid, weights)
Ejemplo n.º 9
0
def test_exponential_update():
    ly = layers.BernoulliLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    scaled_units = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    beta = be.rand((num_samples, 1))
    ly.update(scaled_units, weights, beta)
Ejemplo n.º 10
0
def test_ising_update():
    ly = layers.IsingLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    scaled_units = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    beta = be.rand((num_samples, 1))
    ly.update(scaled_units, weights, beta)
Ejemplo n.º 11
0
def test_onehot_derivatives():
    ly = layers.OneHotLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    vis = ly.random((num_samples, num_vis))
    hid = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    ly.derivatives(vis, hid, weights)
Ejemplo n.º 12
0
def test_clamped_SequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25
    steps = 1

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)
    dropout_scale = State.dropout_rescale(rbm)

    # since we set no dropout, dropout_scale should be None
    assert dropout_scale is None

    for u in [
            'markov_chain', 'mean_field_iteration', 'deterministic_iteration'
    ]:

        # set up the sampler with the visible layer clamped
        sampler = fit.SequentialMC(rbm, updater=u, clamped=[0])
        sampler.set_state(data_state)

        # update the sampler state and check the output
        sampler.update_state(steps, dropout_scale)

        assert be.allclose(data_state.units[0], sampler.state.units[0]), \
        "visible layer is clamped, and shouldn't get updated: {}".format(u)

        assert not be.allclose(data_state.units[1], sampler.state.units[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)
Ejemplo n.º 13
0
def test_exponential_derivatives():
    ly = layers.ExponentialLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    vis = ly.random((num_samples, num_vis))
    hid = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    beta = be.rand((num_samples, 1))
    ly.derivatives(vis, hid, weights, beta)
Ejemplo n.º 14
0
def test_ising_derivatives():
    ly = layers.IsingLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    vis = ly.random((num_samples, num_vis))
    hid = [be.randn((num_samples, num_hid))]
    weights = [w.W()]
    beta = be.rand((num_samples, 1))
    ly.derivatives(vis, hid, weights, beta)
Ejemplo n.º 15
0
def test_bernoulli_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute conditional parameters
    hidden_field = be.dot(vdata, W)  # (batch_size, num_hidden_units)
    hidden_field += b

    visible_field = be.dot(hdata,
                           be.transpose(W))  # (batch_size, num_visible_units)
    visible_field += a

    # compute conditional parameters with layer funcitons
    hidden_field_layer = rbm.layers[1]._conditional_params(
        [vdata], [rbm.weights[0].W()])
    visible_field_layer = rbm.layers[0]._conditional_params(
        [hdata], [rbm.weights[0].W_T()])

    assert be.allclose(hidden_field, hidden_field_layer), \
    "hidden field wrong in bernoulli-bernoulli rbm"

    assert be.allclose(visible_field, visible_field_layer), \
    "visible field wrong in bernoulli-bernoulli rbm"
Ejemplo n.º 16
0
def test_pdist():
    n = 500
    a_shape = (1000, n)
    b_shape = (1000, n)

    # distance distributions
    a_mean, a_scale = 1, 1
    b_mean, b_scale = -1, 1

    be.set_seed()
    a = a_mean + a_scale * be.randn(a_shape)
    b = b_mean + b_scale * be.randn(b_shape)

    dists = math_utils.pdist(a, b)
    dists_t = math_utils.pdist(b, a)
    assert be.shape(dists) == (1000, 1000)
    assert be.allclose(be.transpose(dists_t), dists)
    assert be.mean(dists) > 2 * math.sqrt(n) and be.mean(
        dists) < 3 * math.sqrt(n)
Ejemplo n.º 17
0
def test_bernoulli_update():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = hidden.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].int_params.loc[:] = a
    rbm.layers[1].int_params.loc[:] = b
    rbm.weights[0].int_params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute extrinsic parameters
    hidden_field = be.dot(vdata, W)  # (batch_size, num_hidden_units)
    hidden_field += be.broadcast(b, hidden_field)

    visible_field = be.dot(hdata,
                           be.transpose(W))  # (batch_size, num_visible_units)
    visible_field += be.broadcast(a, visible_field)

    # update the extrinsic parameter using the layer functions
    rbm.layers[0].update([hdata], [rbm.weights[0].W_T()])
    rbm.layers[1].update([vdata], [rbm.weights[0].W()])

    assert be.allclose(hidden_field, rbm.layers[1].ext_params.field), \
    "hidden field wrong in bernoulli-bernoulli rbm"

    assert be.allclose(visible_field, rbm.layers[0].ext_params.field), \
    "visible field wrong in bernoulli-bernoulli rbm"
Ejemplo n.º 18
0
def test_gaussian_GFE_entropy_gradient():
    num_units = 5
    lay = layers.GaussianLayer(num_units)

    lay.params.loc[:] = be.rand_like(lay.params.loc)
    lay.params.log_var[:] = be.randn(be.shape(lay.params.loc))

    from cytoolz import compose
    sum_square = compose(be.tsum, be.square)

    for itr in range(10):
        mag = lay.get_random_magnetization()
        lms = lay.lagrange_multipliers_analytic(mag)
        entropy = lay.TAP_entropy(mag)
        lr = 0.001
        gogogo = True
        grad = lay.TAP_magnetization_grad(mag, [], [], [])
        grad_mag = math.sqrt(be.float_scalar(be.accumulate(sum_square, grad)))
        normit = partial(be.tmul_, be.float_scalar(1.0/grad_mag))
        be.apply_(normit, grad)
        rand_grad = lay.get_random_magnetization()
        grad_mag = math.sqrt(be.float_scalar(be.accumulate(sum_square, rand_grad)))
        normit = partial(be.tmul_, be.float_scalar(1.0/grad_mag))
        be.apply_(normit, rand_grad)
        while gogogo:
            cop1_mag = deepcopy(mag)
            cop1_lms = deepcopy(lms)
            cop2_mag = deepcopy(mag)
            cop2_lms = deepcopy(lms)

            cop1_mag.mean[:] = mag.mean + lr * grad.mean
            cop2_mag.mean[:] = mag.mean + lr * rand_grad.mean
            cop1_mag.variance[:] = mag.variance + lr * grad.variance
            cop2_mag.variance[:] = mag.variance + lr * rand_grad.variance
            lay.clip_magnetization_(cop1_mag)
            lay.clip_magnetization_(cop2_mag)
            cop1_lms = lay.lagrange_multipliers_analytic(cop1_mag)
            cop2_lms = lay.lagrange_multipliers_analytic(cop2_mag)

            entropy_1 = lay.TAP_entropy(cop1_mag)
            entropy_2 = lay.TAP_entropy(cop2_mag)

            regress = entropy_1 - entropy_2 < 0.0
            #print(itr, "[",lr, "] ", entropy, entropy_1, entropy_2, regress)
            if regress:
                #print(grad, rand_grad)
                if lr < 1e-6:
                    assert False,\
                    "Gaussian GFE magnetization gradient is wrong"
                    break
                else:
                    lr *= 0.5
            else:
                break
Ejemplo n.º 19
0
def test_gaussian_1D_1mode_train():
    # create some example data
    num = 10000
    mu = 3
    sigma = 1
    samples = be.randn((num, 1)) * sigma + mu

    # set up the reader to get minibatches
    batch_size = 100
    samples_train, samples_validate = batch.split_tensor(samples, 0.9)
    data = batch.Batch({
        'train':
        batch.InMemoryTable(samples_train, batch_size),
        'validate':
        batch.InMemoryTable(samples_validate, batch_size)
    })

    # parameters
    learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=0.1)
    mc_steps = 1
    num_epochs = 10
    num_sample_steps = 100

    # set up the model and initialize the parameters
    vis_layer = layers.GaussianLayer(1)
    hid_layer = layers.OneHotLayer(1)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data, method='hinton')

    # modify the parameters to shift the initialized model from the data
    # this forces it to train
    rbm.layers[0].params = layers.ParamsGaussian(
        rbm.layers[0].params.loc - 3, rbm.layers[0].params.log_var - 1)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd = fit.SGD(rbm, data)

    # fit the model
    print('training with persistent contrastive divergence')
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps)

    # sample data from the trained model
    model_state = \
        samplers.SequentialMC.generate_fantasy_state(rbm, num, num_sample_steps)
    pts_trained = model_state[0]

    percent_error = 10
    mu_trained = be.mean(pts_trained)
    assert numpy.abs(mu_trained / mu - 1) < (percent_error / 100)

    sigma_trained = numpy.sqrt(be.var(pts_trained))
    assert numpy.abs(sigma_trained / sigma - 1) < (percent_error / 100)
Ejemplo n.º 20
0
def test_unclamped_DrivenSequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25
    steps = 1

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)

    for u in ['markov_chain', 'mean_field_iteration', 'deterministic_iteration']:
        # set up the sampler with the visible layer clamped
        sampler = samplers.SequentialMC(rbm, updater=u)
        sampler.set_state(data_state)

        # update the sampler state and check the output
        sampler.update_state(steps)

        assert not be.allclose(data_state[0], sampler.state[0]), \
        "visible layer is not clamped, and should get updated: {}".format(u)

        assert not be.allclose(data_state[1], sampler.state[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)
Ejemplo n.º 21
0
def test_grbm_save():
    vis_layer = layers.BernoulliLayer(num_vis, center=True)
    hid_layer = layers.GaussianLayer(num_hid, center=True)
    grbm = BoltzmannMachine([vis_layer, hid_layer])
    data = batch.Batch({
        'train':
        batch.InMemoryTable(be.randn((10 * num_samples, num_vis)), num_samples)
    })
    grbm.initialize(data)
    with tempfile.NamedTemporaryFile() as file:
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
Ejemplo n.º 22
0
def test_find_k_nearest_neighbors():
    n = 20
    shp = (20, n)

    perm = be.rand_int(0, 20, (20, ))
    k = 1
    be.set_seed()

    y = be.randn(shp)
    x = y[perm]

    indices, _distances = math_utils.find_k_nearest_neighbors(x, y, k)

    assert be.allclose(indices, perm)
    assert be.allclose(_distances, be.zeros((20, )), 1e-2, 1e-2)
Ejemplo n.º 23
0
def test_bernoulli_GFE_derivatives():
    # Tests that the GFE derivative update increases GFE versus 100
    # random update vectors
    num_units = 5

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)
    rbm = BoltzmannMachine([layer_1, layer_2, layer_3])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 1.0
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0.0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Bernoulli models"
                    break
                else:
                    lr *= 0.5
            else:
                break
Ejemplo n.º 24
0
def test_gaussian_GFE_derivatives_gradient_descent():
    num_units = 5

    layer_1 = layers.GaussianLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)

    rbm = BoltzmannMachine([layer_1, layer_2])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, GFE = rbm.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 0.001
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Gaussian models"
                    break
                else:
                    lr *= 0.5
            else:
                break
Ejemplo n.º 25
0
def test_bernoulli_GFE_derivatives():
    num_units = 500

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)

    rbm = model.Model([layer_1, layer_2, layer_3])
    for i in range(len(rbm.weights)):
        rbm.weights[i].params.matrix[:] = \
        0.01 * be.randn(rbm.weights[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    GFE = rbm.gibbs_free_energy(state)

    lr = 0.1
    gogogo = True
    grad = rbm.grad_TAP_free_energy(0.1, 1e-7, 50)
    while gogogo:
        cop = deepcopy(rbm)
        lr_mul = partial(be.tmul, -lr)

        delta = gu.grad_apply(lr_mul, grad)
        cop.parameter_update(delta)

        cop_state = cop.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
        cop_GFE = cop.gibbs_free_energy(cop_state)

        regress = cop_GFE - GFE < 0.0
        print(lr, cop_GFE, GFE, cop_GFE - GFE, regress)
        if regress:
            if lr < 1e-6:
                assert False, \
                "TAP FE gradient is not working properly for Bernoulli models"
                break
            else:
                lr *= 0.5
        else:
            break
Ejemplo n.º 26
0
def test_gaussian_Compute_StateTAP_GD():
    num_units = 10

    layer_1 = layers.GaussianLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)

    rbm = BoltzmannMachine([layer_1, layer_2])
    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    for i in range(100):
        random_state = StateTAP.from_model_rand(rbm)
        GFE = rbm.gibbs_free_energy(random_state.cumulants)
        _,min_GFE = rbm._compute_StateTAP_GD(seed=random_state)

        if GFE - min_GFE < 0.0:
            assert False, \
                "compute_StateTAP_self_consistent is not reducing the GFE"
Ejemplo n.º 27
0
def test_bernoulli_GFE_magnetization_gradient():
    num_units = 500

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)
    layer_4 = layers.BernoulliLayer(num_units)
    rbm = model.Model([layer_1, layer_2, layer_3, layer_4])
    for i in range(len(rbm.weights)):
        rbm.weights[i].params.matrix[:] = \
        0.01 * be.randn(rbm.weights[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state = mu.StateTAP.from_model_rand(rbm)
    GFE = rbm.gibbs_free_energy(state)

    lr = 0.001
    gogogo = True
    grad = rbm._TAP_magnetization_grad(state)
    while gogogo:
        cop = deepcopy(state)
        for i in range(rbm.num_layers):
            cop.cumulants[
                i].mean[:] = state.cumulants[i].mean + lr * grad[i].mean

        GFE_next = rbm.gibbs_free_energy(cop)
        regress = GFE_next - GFE < 0.0
        if regress:
            if lr < 1e-6:
                assert False,\
                "Bernoulli GFE magnetization gradient is wrong"
                break
            else:
                lr *= 0.5
        else:
            break
Ejemplo n.º 28
0
def test_weights_energy():
    ly = layers.Weights((num_vis, num_hid))
    vis = be.randn((num_samples, num_vis))
    hid = be.randn((num_samples, num_hid))
    ly.energy(vis, hid)
Ejemplo n.º 29
0
def test_parameter_step():
    ly = layers.Weights((num_vis, num_hid))
    deltas = layers.ParamsWeights(be.randn(ly.shape))
    ly.parameter_step(deltas)
Ejemplo n.º 30
0
def test_gaussian_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = hidden.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    log_var_a = 0.1 * be.randn((num_visible_units, ))
    log_var_b = 0.1 * be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].int_params.loc[:] = a
    rbm.layers[1].int_params.loc[:] = b
    rbm.layers[0].int_params.log_var[:] = log_var_a
    rbm.layers[1].int_params.log_var[:] = log_var_b
    rbm.weights[0].int_params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    visible_var = be.exp(log_var_a)
    vdata_scaled = vdata / be.broadcast(visible_var, vdata)

    # compute the mean of the hidden layer
    rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()])
    hidden_var = be.exp(log_var_b)
    hid_mean = rbm.layers[1].mean()
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_vis_loc = -be.mean(vdata_scaled, axis=0)
    d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0)
    d_vis_logvar += be.batch_dot(
        hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata)
    d_vis_logvar /= visible_var

    d_hid_loc = -be.mean(hid_mean_scaled, axis=0)

    d_hid_logvar = -0.5 * be.mean(
        be.square(hid_mean - be.broadcast(b, hid_mean)), axis=0)
    d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean,
                                 axis=0) / len(hid_mean)
    d_hid_logvar /= hidden_var

    d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled)

    # compute the derivatives using the layer functions
    rbm.layers[1].update([vdata_scaled], [rbm.weights[0].W()])
    rbm.layers[0].update([hid_mean_scaled], [rbm.weights[0].W_T()])

    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                           [rbm.weights[0].W()])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.weights[0].W_T()])

    weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled)

    assert be.allclose(d_vis_loc, vis_derivs.loc), \
    "derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_loc, hid_derivs.loc), \
    "derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_vis_logvar, vis_derivs.log_var, rtol=1e-05, atol=1e-01), \
    "derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_logvar, hid_derivs.log_var, rtol=1e-05, atol=1e-01), \
    "derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_W, weight_derivs.matrix), \
    "derivative of weights wrong in gaussian-gaussian rbm"