def example_mnist_tap_machine(paysage_path=None,
                              num_epochs=10,
                              show_plot=False):

    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.power_law_decay(initial=0.1, coefficient=0.1)

    (_, _, shuffled_filepath) = \
            util.default_paths(paysage_path)

    # set up the reader to get minibatches
    data = batch.HDFBatch(shuffled_filepath,
                          'train/images',
                          batch_size,
                          transform=batch.binarize_color,
                          train_fraction=0.95)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_layer])
    rbm.initialize(data, 'glorot_normal')

    perf = fit.ProgressMonitor(
        data,
        metrics=['ReconstructionError', 'EnergyDistance', 'HeatCapacity'])

    opt = optimizers.Gradient(stepsize=learning_rate,
                              tolerance=1e-4,
                              ascent=True)

    sampler = fit.DrivenSequentialMC.from_batch(rbm, data)

    sgd = fit.SGD(rbm,
                  data,
                  opt,
                  num_epochs,
                  sampler=sampler,
                  method=fit.tap,
                  monitor=perf)

    # fit the model
    print('Training with stochastic gradient ascent using TAP expansion')
    sgd.train()

    util.show_metrics(rbm, perf)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              fit,
                              show_plot,
                              n_recon=10,
                              vertical=False)
    util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25)
    util.show_weights(rbm, show_plot, n_weights=25)
    # close the HDF5 store
    data.close()
    print("Done")
Beispiel #2
0
def test_grbm_save():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    grbm = model.Model([vis_layer, hid_layer])
    with tempfile.NamedTemporaryFile() as file:
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
Beispiel #3
0
def example_mnist_deep_rbm(paysage_path=None, num_epochs=10, show_plot=False):
    num_hidden_units = 500
    batch_size = 100
    learning_rate = schedules.power_law_decay(initial=0.01, coefficient=0.1)
    mc_steps = 1

    (_, _, shuffled_filepath) = \
            util.default_paths(paysage_path)

    # set up the reader to get minibatches
    data = batch.HDFBatch(shuffled_filepath,
                          'train/images',
                          batch_size,
                          transform=batch.binarize_color,
                          train_fraction=0.99)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_1_layer = layers.BernoulliLayer(num_hidden_units)
    hid_2_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_1_layer, hid_2_layer])
    rbm.initialize(data)

    metrics = [
        'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore',
        'HeatCapacity'
    ]
    perf = fit.ProgressMonitor(data, metrics=metrics)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)

    sampler = fit.SequentialMC.from_batch(rbm, data)

    cd = fit.SGD(rbm,
                 data,
                 opt,
                 num_epochs,
                 method=fit.pcd,
                 sampler=sampler,
                 mcsteps=mc_steps,
                 monitor=perf)

    # fit the model
    print('training with contrastive divergence')
    cd.train_layerwise()

    # evaluate the model
    util.show_metrics(rbm, perf)
    valid = data.get('validate')
    util.show_reconstructions(rbm, valid, fit, show_plot)
    util.show_fantasy_particles(rbm, valid, fit, show_plot)
    util.show_weights(rbm, show_plot)

    # close the HDF5 store
    data.close()
    print("Done")
Beispiel #4
0
def test_grbm_from_config():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    grbm = model.Model([vis_layer, hid_layer])
    config = grbm.get_config()

    rbm_from_config = model.Model.from_config(config)
    config_from_config = rbm_from_config.get_config()
    assert config == config_from_config
Beispiel #5
0
def example_mnist_grbm(paysage_path=None, num_epochs=10, show_plot=False):

    num_hidden_units = 500
    batch_size = 50
    learning_rate = 0.001  # gaussian rbm usually requires smaller learnign rate
    mc_steps = 1

    (_, _, shuffled_filepath) = \
            util.default_paths(paysage_path)

    # set up the reader to get minibatches
    data = batch.Batch(shuffled_filepath,
                       'train/images',
                       batch_size,
                       transform=transform,
                       train_fraction=0.99)

    # set up the model and initialize the parameters
    vis_layer = layers.GaussianLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_layer])
    rbm.initialize(data)

    metrics = [
        'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore'
    ]
    perf = fit.ProgressMonitor(data, metrics=metrics)

    opt = optimizers.ADAM(stepsize=learning_rate,
                          scheduler=optimizers.PowerLawDecay(0.1))

    sampler = fit.DrivenSequentialMC.from_batch(rbm, data, method='stochastic')

    cd = fit.SGD(rbm,
                 data,
                 opt,
                 num_epochs,
                 method=fit.pcd,
                 sampler=sampler,
                 mcsteps=mc_steps,
                 monitor=perf)

    # fit the model
    print('training with contrastive divergence')
    cd.train()

    # evaluate the model
    util.show_metrics(rbm, perf)
    util.show_reconstructions(rbm, data.get('validate'), fit, show_plot)
    util.show_fantasy_particles(rbm, data.get('validate'), fit, show_plot)
    util.show_weights(rbm, show_plot)

    # close the HDF5 store
    data.close()
    print("Done")
Beispiel #6
0
def test_state_for_grad_DrivenSequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)
    dropout_scale = State.dropout_rescale(rbm)

    # since we set no dropout, dropout_scale should be None
    assert dropout_scale is None

    for u in [
            'markov_chain', 'mean_field_iteration', 'deterministic_iteration'
    ]:
        # set up the sampler
        sampler = fit.DrivenSequentialMC(rbm, updater=u, clamped=[0])
        sampler.set_state(data_state)

        # update the state of the hidden layer
        grad_state = sampler.state_for_grad(1, dropout_scale)

        assert be.allclose(data_state.units[0], grad_state.units[0]), \
        "visible layer is clamped, and shouldn't get updated: {}".format(u)

        assert not be.allclose(data_state.units[1], grad_state.units[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)

        # compute the conditional mean with the layer function
        ave = rbm.layers[1].conditional_mean(
            rbm._connected_rescaled_units(1, data_state, dropout_scale),
            rbm._connected_weights(1))

        assert be.allclose(ave, grad_state.units[1]), \
        "hidden layer of grad_state should be conditional mean: {}".format(u)
Beispiel #7
0
def test_exponential_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.ExponentialLayer(num_visible_units)
    hid_layer = layers.ExponentialLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    # for the exponential layers, we need a > 0, b > 0, and W < 0
    a = be.rand((num_visible_units, ))
    b = be.rand((num_hidden_units, ))
    W = -be.rand((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    vdata_scaled = rbm.layers[0].rescale(vdata)

    # compute the mean of the hidden layer
    hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.weights[0].W()])
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_visible_loc = be.mean(vdata, axis=0)
    d_hidden_loc = be.mean(hid_mean_scaled, axis=0)
    d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                           [rbm.weights[0].W_T()])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.weights[0].W()])

    weight_derivs = rbm.weights[0].derivatives(vdata, hid_mean_scaled)

    assert be.allclose(d_visible_loc, vis_derivs.loc), \
    "derivative of visible loc wrong in exponential-exponential rbm"

    assert be.allclose(d_hidden_loc, hid_derivs.loc), \
    "derivative of hidden loc wrong in exponential-exponential rbm"

    assert be.allclose(d_W, weight_derivs.matrix), \
    "derivative of weights wrong in exponential-exponential rbm"
Beispiel #8
0
def run(paysage_path=None, num_epochs=10, show_plot=False):

    num_hidden_units = 500
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1)
    mc_steps = 1

    (_, _, shuffled_filepath) = \
        util.default_paths(paysage_path)

    # set up the reader to get minibatches
    data = batch.HDFBatch(shuffled_filepath,
                         'train/images',
                          batch_size,
                          transform=pre.binarize_color,
                          train_fraction=0.99)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    hid_layer.set_fixed_params(["loc", "log_var"])

    rbm = model.Model([vis_layer, hid_layer])
    rbm.initialize(data, method="glorot_normal")

    metrics = ['ReconstructionError', 'EnergyDistance', 'EnergyGap',
               'EnergyZscore', 'HeatCapacity', 'WeightSparsity', 'WeightSquare']
    perf = fit.ProgressMonitor(data, metrics=metrics)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)

    sampler = fit.DrivenSequentialMC.from_batch(rbm, data)

    cd = fit.SGD(rbm, data, opt, num_epochs, sampler, method=fit.pcd,
                 mcsteps=mc_steps, monitor=perf)

    # fit the model
    print('training with contrastive divergence')
    cd.train()

    # evaluate the model
    util.show_metrics(rbm, perf)
    valid = data.get('validate')
    util.show_reconstructions(rbm, valid, fit, show_plot,
                              n_recon=10, vertical=False, num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25)
    util.show_weights(rbm, show_plot, n_weights=25)

    # close the HDF5 store
    data.close()
    print("Done")
Beispiel #9
0
def test_random_grad():
    num_visible_units = 100
    num_hidden_units = 50

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # create a gradient object filled with random numbers
    gu.random_grad(rbm)
Beispiel #10
0
def test_clamped_SequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25
    steps = 1

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)
    dropout_scale = State.dropout_rescale(rbm)

    # since we set no dropout, dropout_scale should be None
    assert dropout_scale is None

    for u in [
            'markov_chain', 'mean_field_iteration', 'deterministic_iteration'
    ]:

        # set up the sampler with the visible layer clamped
        sampler = fit.SequentialMC(rbm, updater=u, clamped=[0])
        sampler.set_state(data_state)

        # update the sampler state and check the output
        sampler.update_state(steps, dropout_scale)

        assert be.allclose(data_state.units[0], sampler.state.units[0]), \
        "visible layer is clamped, and shouldn't get updated: {}".format(u)

        assert not be.allclose(data_state.units[1], sampler.state.units[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)
Beispiel #11
0
def test_exponential_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.ExponentialLayer(num_visible_units)
    hid_layer = layers.ExponentialLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    # for the exponential layers, we need a > 0, b > 0, and W < 0
    a = be.rand((num_visible_units, ))
    b = be.rand((num_hidden_units, ))
    W = -be.rand((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute conditional parameters
    hidden_rate = -be.dot(vdata, W)  # (batch_size, num_hidden_units)
    hidden_rate += be.broadcast(b, hidden_rate)

    visible_rate = -be.dot(hdata,
                           be.transpose(W))  # (batch_size, num_visible_units)
    visible_rate += be.broadcast(a, visible_rate)

    # compute the conditional parameters using the layer functions
    hidden_rate_func = rbm.layers[1]._conditional_params([vdata],
                                                         [rbm.weights[0].W()])
    visible_rate_func = rbm.layers[0]._conditional_params(
        [hdata], [rbm.weights[0].W_T()])

    assert be.allclose(hidden_rate, hidden_rate_func), \
    "hidden rate wrong in exponential-exponential rbm"

    assert be.allclose(visible_rate, visible_rate_func), \
    "visible rate wrong in exponential-exponential rbm"
Beispiel #12
0
def test_bernoulli_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute conditional parameters
    hidden_field = be.dot(vdata, W)  # (batch_size, num_hidden_units)
    hidden_field += b

    visible_field = be.dot(hdata,
                           be.transpose(W))  # (batch_size, num_visible_units)
    visible_field += a

    # compute conditional parameters with layer funcitons
    hidden_field_layer = rbm.layers[1]._conditional_params(
        [vdata], [rbm.weights[0].W()])
    visible_field_layer = rbm.layers[0]._conditional_params(
        [hdata], [rbm.weights[0].W_T()])

    assert be.allclose(hidden_field, hidden_field_layer), \
    "hidden field wrong in bernoulli-bernoulli rbm"

    assert be.allclose(visible_field, visible_field_layer), \
    "visible field wrong in bernoulli-bernoulli rbm"
Beispiel #13
0
def test_bernoulli_GFE_derivatives():
    num_units = 500

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)

    rbm = model.Model([layer_1, layer_2, layer_3])
    for i in range(len(rbm.weights)):
        rbm.weights[i].params.matrix[:] = \
        0.01 * be.randn(rbm.weights[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    GFE = rbm.gibbs_free_energy(state)

    lr = 0.1
    gogogo = True
    grad = rbm.grad_TAP_free_energy(0.1, 1e-7, 50)
    while gogogo:
        cop = deepcopy(rbm)
        lr_mul = partial(be.tmul, -lr)

        delta = gu.grad_apply(lr_mul, grad)
        cop.parameter_update(delta)

        cop_state = cop.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
        cop_GFE = cop.gibbs_free_energy(cop_state)

        regress = cop_GFE - GFE < 0.0
        print(lr, cop_GFE, GFE, cop_GFE - GFE, regress)
        if regress:
            if lr < 1e-6:
                assert False, \
                "TAP FE gradient is not working properly for Bernoulli models"
                break
            else:
                lr *= 0.5
        else:
            break
Beispiel #14
0
def test_grbm_reload():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    # create some extrinsics
    grbm = model.Model([vis_layer, hid_layer])
    with tempfile.NamedTemporaryFile() as file:
        # save the model
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
        # reload
        store = pandas.HDFStore(file.name, mode='r')
        grbm_reload = model.Model.from_saved(store)
        store.close()
    # check the two models are consistent
    vis_data = vis_layer.random((num_samples, num_vis))
    data_state = model.State.from_visible(vis_data, grbm)
    vis_orig = grbm.deterministic_iteration(1, data_state).units[0]
    vis_reload = grbm_reload.deterministic_iteration(1, data_state).units[0]
    assert be.allclose(vis_orig, vis_reload)
Beispiel #15
0
def test_bernoulli_GFE_magnetization_gradient():
    num_units = 500

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)
    layer_4 = layers.BernoulliLayer(num_units)
    rbm = model.Model([layer_1, layer_2, layer_3, layer_4])
    for i in range(len(rbm.weights)):
        rbm.weights[i].params.matrix[:] = \
        0.01 * be.randn(rbm.weights[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state = mu.StateTAP.from_model_rand(rbm)
    GFE = rbm.gibbs_free_energy(state)

    lr = 0.001
    gogogo = True
    grad = rbm._TAP_magnetization_grad(state)
    while gogogo:
        cop = deepcopy(state)
        for i in range(rbm.num_layers):
            cop.cumulants[
                i].mean[:] = state.cumulants[i].mean + lr * grad[i].mean

        GFE_next = rbm.gibbs_free_energy(cop)
        regress = GFE_next - GFE < 0.0
        if regress:
            if lr < 1e-6:
                assert False,\
                "Bernoulli GFE magnetization gradient is wrong"
                break
            else:
                lr *= 0.5
        else:
            break
Beispiel #16
0
def test_independent():
    """
    Test sampling from an rbm with two layers connected by a weight matrix that
    contains all zeros, so that the layers are independent.

    Note:
        This test compares values estimated by *sampling* to values computed
        analytically. It can fail for small batch_size, or strict tolerances,
        even if everything is working propery.

    """
    num_visible_units = 20
    num_hidden_units = 10
    batch_size = 1000
    steps = 100
    mean_tol = 0.1
    corr_tol = 0.2

    # set a seed for the random number generator
    be.set_seed()

    layer_types = [layers.BernoulliLayer, layers.GaussianLayer]

    for layer_type in layer_types:
        # set up some layer and model objects
        vis_layer = layer_type(num_visible_units)
        hid_layer = layer_type(num_hidden_units)
        rbm = model.Model([vis_layer, hid_layer])

        # randomly set the intrinsic model parameters
        a = be.rand((num_visible_units, ))
        b = be.rand((num_hidden_units, ))
        W = be.zeros((num_visible_units, num_hidden_units))

        rbm.layers[0].params.loc[:] = a
        rbm.layers[1].params.loc[:] = b
        rbm.weights[0].params.matrix[:] = W

        if layer_type == layers.GaussianLayer:
            log_var_a = be.randn((num_visible_units, ))
            log_var_b = be.randn((num_hidden_units, ))
            rbm.layers[0].params.log_var[:] = log_var_a
            rbm.layers[1].params.log_var[:] = log_var_b

        # initialize a state
        state = State.from_model(batch_size, rbm)
        dropout = State.dropout_rescale(rbm)

        # run a markov chain to update the state
        state = rbm.markov_chain(steps, state, dropout)

        # compute the mean
        state_for_moments = State.from_model(1, rbm)
        sample_mean = [
            be.mean(state.units[i], axis=0) for i in range(len(state.units))
        ]
        model_mean = [
            rbm.layers[i].conditional_mean(
                rbm._connected_rescaled_units(i, state_for_moments, dropout),
                rbm._connected_weights(i)) for i in range(rbm.num_layers)
        ]

        # check that the means are roughly equal
        for i in range(rbm.num_layers):
            ave = sample_mean[i]
            close = be.allclose(ave,
                                model_mean[i][0],
                                rtol=mean_tol,
                                atol=mean_tol)
            assert close, "{0} {1}: sample mean does not match model mean".format(
                layer_type, i)

        # check the cross correlation between the layers
        crosscov = be.cov(state.units[0], state.units[1])
        norm = be.outer(be.std(state.units[0], axis=0),
                        be.std(state.units[1], axis=0))
        crosscorr = be.divide(norm, crosscov)
        assert be.tmax(
            be.tabs(crosscorr)
        ) < corr_tol, "{} cross correlation too large".format(layer_type)
Beispiel #17
0
def test_conditional_sampling():
    """
    Test sampling from one layer conditioned on the state of another layer.

    Note:
        This test compares values estimated by *sampling* to values computed
        analytically. It can fail for small batch_size, or strict tolerances,
        even if everything is working propery.

    """
    num_visible_units = 20
    num_hidden_units = 10
    steps = 1000
    mean_tol = 0.1

    # set a seed for the random number generator
    be.set_seed()

    layer_types = [layers.BernoulliLayer, layers.GaussianLayer]

    for layer_type in layer_types:
        # set up some layer and model objects
        vis_layer = layer_type(num_visible_units)
        hid_layer = layer_type(num_hidden_units)
        rbm = model.Model([vis_layer, hid_layer])

        # randomly set the intrinsic model parameters
        a = be.rand((num_visible_units, ))
        b = be.rand((num_hidden_units, ))
        W = 10 * be.rand((num_visible_units, num_hidden_units))

        rbm.layers[0].params.loc[:] = a
        rbm.layers[1].params.loc[:] = b
        rbm.weights[0].params.matrix[:] = W

        if layer_type == layers.GaussianLayer:
            log_var_a = be.randn((num_visible_units, ))
            log_var_b = be.randn((num_hidden_units, ))
            rbm.layers[0].params.log_var[:] = log_var_a
            rbm.layers[1].params.log_var[:] = log_var_b

        # initialize a state
        state = State.from_model(1, rbm)
        dropout = State.dropout_rescale(rbm)

        # set up a calculator for the moments
        moments = mu.MeanVarianceArrayCalculator()

        for _ in range(steps):
            moments.update(rbm.layers[0].conditional_sample(
                rbm._connected_rescaled_units(0, state, dropout),
                rbm._connected_weights(0)))

        model_mean = rbm.layers[0].conditional_mean(
            rbm._connected_rescaled_units(0, state, dropout),
            rbm._connected_weights(0))

        ave = moments.mean

        close = be.allclose(ave, model_mean[0], rtol=mean_tol, atol=mean_tol)
        assert close, "{} conditional mean".format(layer_type)

        if layer_type == layers.GaussianLayer:
            model_mean, model_var = rbm.layers[0]._conditional_params(
                rbm._connected_rescaled_units(0, state, dropout),
                rbm._connected_weights(0))

            close = be.allclose(be.sqrt(moments.var),
                                be.sqrt(model_var[0]),
                                rtol=mean_tol,
                                atol=mean_tol)
            assert close, "{} conditional standard deviation".format(
                layer_type)
Beispiel #18
0
def test_grbm_config():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    grbm = model.Model([vis_layer, hid_layer])
    grbm.get_config()
Beispiel #19
0
def test_hopfield_construction():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    rbm = model.Model([vis_layer, hid_layer])
Beispiel #20
0
def test_rmb_construction():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.BernoulliLayer(num_hid)
    rbm = model.Model([vis_layer, hid_layer])
Beispiel #21
0
def test_rbm(paysage_path=None):

    num_hidden_units = 50
    batch_size = 50
    num_epochs = 1
    learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1)
    mc_steps = 1

    if not paysage_path:
        paysage_path = os.path.dirname(
            os.path.dirname(os.path.abspath(__file__)))
    filepath = os.path.join(paysage_path, 'mnist', 'mnist.h5')

    if not os.path.exists(filepath):
        raise IOError(
            "{} does not exist. run mnist/download_mnist.py to fetch from the web"
            .format(filepath))

    shuffled_filepath = os.path.join(paysage_path, 'mnist',
                                     'shuffled_mnist.h5')

    # shuffle the data
    if not os.path.exists(shuffled_filepath):
        shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0)
        shuffler.shuffle()

    # set a seed for the random number generator
    be.set_seed()

    # set up the reader to get minibatches
    data = batch.HDFBatch(shuffled_filepath,
                          'train/images',
                          batch_size,
                          transform=pre.binarize_color,
                          train_fraction=0.99)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_layer])
    rbm.initialize(data)

    # obtain initial estimate of the reconstruction error
    perf = fit.ProgressMonitor(data, metrics=['ReconstructionError'])
    untrained_performance = perf.check_progress(rbm)

    # set up the optimizer and the fit method
    opt = optimizers.RMSProp(stepsize=learning_rate)

    sampler = fit.DrivenSequentialMC.from_batch(rbm, data)

    cd = fit.SGD(rbm,
                 data,
                 opt,
                 num_epochs,
                 sampler,
                 method=fit.pcd,
                 mcsteps=mc_steps,
                 monitor=perf)

    # fit the model
    print('training with contrastive divergence')
    cd.train()

    # obtain an estimate of the reconstruction error after 1 epoch
    trained_performance = perf.check_progress(rbm)

    assert (trained_performance['ReconstructionError'] <
            untrained_performance['ReconstructionError']), \
    "Reconstruction error did not decrease"

    # close the HDF5 store
    data.close()
Beispiel #22
0
def run(paysage_path=None, num_epochs=10, show_plot=False):
    num_hidden_units = 100
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.012, coefficient=0.1)
    mc_steps = 1

    (_, _, shuffled_filepath) = \
            util.default_paths(paysage_path)

    # set up the reader to get minibatches
    data = batch.HDFBatch(shuffled_filepath,
                          'train/images',
                          batch_size,
                          transform=pre.binarize_color,
                          train_fraction=0.95)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_1_layer = layers.BernoulliLayer(num_hidden_units)
    hid_2_layer = layers.BernoulliLayer(num_hidden_units)
    hid_3_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_1_layer, hid_2_layer, hid_3_layer])
    rbm.initialize(data, method='glorot_normal')

    print("Norms of the weights before training")
    util.weight_norm_histogram(rbm, show_plot=show_plot)

    # small penalties prevent the weights from consolidating
    rbm.weights[1].add_penalty({'matrix': pen.logdet_penalty(0.001)})
    rbm.weights[2].add_penalty({'matrix': pen.logdet_penalty(0.001)})

    metrics = [
        'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore',
        'HeatCapacity', 'WeightSparsity', 'WeightSquare'
    ]
    perf = fit.ProgressMonitor(data, metrics=metrics)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd = fit.LayerwisePretrain(rbm,
                               data,
                               opt,
                               num_epochs,
                               method=fit.pcd,
                               mcsteps=mc_steps,
                               metrics=metrics)

    # fit the model
    print('training with persistent contrastive divergence')
    cd.train()

    # evaluate the model
    util.show_metrics(rbm, perf)
    valid = data.get('validate')
    util.show_reconstructions(rbm, valid, fit, show_plot, num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, fit, show_plot)
    from math import sqrt
    dim = tuple([28] +
                [int(sqrt(num_hidden_units)) for _ in range(rbm.num_weights)])
    util.show_weights(rbm, show_plot, dim=dim, n_weights=16)
    util.show_one_hot_reconstructions(rbm,
                                      fit,
                                      dim=28,
                                      n_recon=16,
                                      num_to_avg=1)

    print("Norms of the weights after training")
    util.weight_norm_histogram(rbm, show_plot=show_plot)

    # close the HDF5 store
    data.close()
    print("Done")
Beispiel #23
0
def run(paysage_path=None, num_epochs=10, show_plot=False):
    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1)
    mc_steps = 1

    (_, _, shuffled_filepath) = \
            util.default_paths(paysage_path)

    # set up the reader to get minibatches
    import pandas
    data = batch.InMemoryBatch(pre.binarize_color(
        be.float_tensor(
            pandas.read_hdf(shuffled_filepath,
                            key='train/images').as_matrix())),
                               batch_size,
                               train_fraction=0.95)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_layer])
    rbm.weights[0].add_penalty({'matrix': pen.l2_penalty(0.001)})
    rbm.initialize(data, method='glorot_normal')

    metrics = [
        'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore',
        'HeatCapacity', 'WeightSparsity', 'WeightSquare'
    ]
    perf = fit.ProgressMonitor(data, metrics=metrics)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)

    sampler = fit.DrivenSequentialMC.from_batch(rbm, data)

    cd = fit.SGD(rbm,
                 data,
                 opt,
                 num_epochs,
                 sampler,
                 method=fit.pcd,
                 mcsteps=mc_steps,
                 monitor=perf)

    # fit the model
    print('training with contrastive divergence')
    cd.train()

    # evaluate the model
    util.show_metrics(rbm, perf)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              fit,
                              show_plot,
                              n_recon=10,
                              vertical=False,
                              num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25)
    util.show_weights(rbm, show_plot, n_weights=25)

    # close the HDF5 store
    data.close()
    print("Done")
Beispiel #24
0
def test_gaussian_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    log_var_a = 0.1 * be.randn((num_visible_units, ))
    log_var_b = 0.1 * be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.layers[0].params.log_var[:] = log_var_a
    rbm.layers[1].params.log_var[:] = log_var_b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    visible_var = be.exp(log_var_a)
    vdata_scaled = vdata / visible_var

    # compute the mean of the hidden layer
    hid_mean = rbm.layers[1].conditional_mean([vdata_scaled],
                                              [rbm.weights[0].W()])
    hidden_var = be.exp(log_var_b)
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_vis_loc = -be.mean(vdata_scaled, axis=0)
    d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0)
    d_vis_logvar += be.batch_dot(
        hid_mean_scaled, be.transpose(W), vdata, axis=0) / len(vdata)
    d_vis_logvar /= visible_var

    d_hid_loc = -be.mean(hid_mean_scaled, axis=0)

    d_hid_logvar = -0.5 * be.mean(be.square(hid_mean - b), axis=0)
    d_hid_logvar += be.batch_dot(vdata_scaled, W, hid_mean,
                                 axis=0) / len(hid_mean)
    d_hid_logvar /= hidden_var

    d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                           [rbm.weights[0].W_T()])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.weights[0].W()])

    weight_derivs = rbm.weights[0].derivatives(vdata_scaled, hid_mean_scaled)

    assert be.allclose(d_vis_loc, vis_derivs[0].loc), \
    "derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_loc, hid_derivs[0].loc), \
    "derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_vis_logvar, vis_derivs[0].log_var, rtol=1e-05, atol=1e-01), \
    "derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_logvar, hid_derivs[0].log_var, rtol=1e-05, atol=1e-01), \
    "derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_W, weight_derivs[0].matrix), \
    "derivative of weights wrong in gaussian-gaussian rbm"
Beispiel #25
0
def test_gaussian_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = model.Model([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units, ))
    b = be.randn((num_hidden_units, ))
    log_var_a = 0.1 * be.randn((num_visible_units, ))
    log_var_b = 0.1 * be.randn((num_hidden_units, ))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.layers[0].params.log_var[:] = log_var_a
    rbm.layers[1].params.log_var[:] = log_var_b
    rbm.weights[0].params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute the variance
    visible_var = be.exp(log_var_a)
    hidden_var = be.exp(log_var_b)

    # rescale the data
    vdata_scaled = vdata / visible_var
    hdata_scaled = hdata / hidden_var

    # test rescale
    assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\
    "visible rescale wrong in gaussian-gaussian rbm"

    assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\
    "hidden rescale wrong in gaussian-gaussian rbm"

    # compute the mean
    hidden_mean = be.dot(vdata_scaled, W)  # (batch_size, num_hidden_units)
    hidden_mean += b

    visible_mean = be.dot(hdata_scaled,
                          be.transpose(W))  # (batch_size, num_hidden_units)
    visible_mean += a

    # update the conditional parameters using the layer functions
    vis_mean_func, vis_var_func = rbm.layers[0]._conditional_params(
        [hdata_scaled], [rbm.weights[0].W_T()])
    hid_mean_func, hid_var_func = rbm.layers[1]._conditional_params(
        [vdata_scaled], [rbm.weights[0].W()])

    assert be.allclose(visible_var, vis_var_func),\
    "visible variance wrong in gaussian-gaussian rbm"

    assert be.allclose(hidden_var, hid_var_func),\
    "hidden variance wrong in gaussian-gaussian rbm"

    assert be.allclose(visible_mean, vis_mean_func),\
    "visible mean wrong in gaussian-gaussian rbm"

    assert be.allclose(hidden_mean, hid_mean_func),\
    "hidden mean wrong in gaussian-gaussian rbm"