Esempio n. 1
0
def test_grbm_from_config():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)

    grbm = BoltzmannMachine([vis_layer, hid_layer])
    config = grbm.get_config()

    rbm_from_config = BoltzmannMachine.from_config(config)
    config_from_config = rbm_from_config.get_config()
    assert config == config_from_config
Esempio n. 2
0
def test_gaussian_1D_1mode_train():
    # create some example data
    num = 10000
    mu = 3
    sigma = 1
    samples = be.randn((num, 1)) * sigma + mu

    # set up the reader to get minibatches
    batch_size = 100
    samples_train, samples_validate = batch.split_tensor(samples, 0.9)
    data = batch.Batch({
        'train':
        batch.InMemoryTable(samples_train, batch_size),
        'validate':
        batch.InMemoryTable(samples_validate, batch_size)
    })

    # parameters
    learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=0.1)
    mc_steps = 1
    num_epochs = 10
    num_sample_steps = 100

    # set up the model and initialize the parameters
    vis_layer = layers.GaussianLayer(1)
    hid_layer = layers.OneHotLayer(1)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data, method='hinton')

    # modify the parameters to shift the initialized model from the data
    # this forces it to train
    rbm.layers[0].params = layers.ParamsGaussian(
        rbm.layers[0].params.loc - 3, rbm.layers[0].params.log_var - 1)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd = fit.SGD(rbm, data)

    # fit the model
    print('training with persistent contrastive divergence')
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps)

    # sample data from the trained model
    model_state = \
        samplers.SequentialMC.generate_fantasy_state(rbm, num, num_sample_steps)
    pts_trained = model_state[0]

    percent_error = 10
    mu_trained = be.mean(pts_trained)
    assert numpy.abs(mu_trained / mu - 1) < (percent_error / 100)

    sigma_trained = numpy.sqrt(be.var(pts_trained))
    assert numpy.abs(sigma_trained / sigma - 1) < (percent_error / 100)
Esempio n. 3
0
def run(num_epochs=10, show_plot=False):

    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1)
    mc_steps = 1

    # set up the reader to get minibatches
    data = util.create_batch(batch_size,
                             train_fraction=0.95,
                             transform=transform)

    # set up the model and initialize the parameters
    vis_layer = layers.GaussianLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data, 'stddev')
    rbm.layers[0].params.log_var[:] = \
      be.log(0.05*be.ones_like(rbm.layers[0].params.log_var))

    opt = optimizers.ADAM(stepsize=learning_rate)

    # This example parameter set for TAP uses gradient descent to optimize the
    # Gibbs free energy:
    tap = fit.TAP(True, 1.0, 0.01, 100, False, 0.9, 0.001, 0.5)

    # This example parameter set for TAP uses self-consistent iteration to
    # optimize the Gibbs free energy:
    #tap = fit.TAP(False, tolerance=0.001, max_iters=100)
    sgd = fit.SGD(rbm, data)
    sgd.monitor.generator_metrics.append(TAPFreeEnergy())
    sgd.monitor.generator_metrics.append(TAPLogLikelihood())

    # fit the model
    print('Training with stochastic gradient ascent using TAP expansion')
    sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps)

    util.show_metrics(rbm, sgd.monitor)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              show_plot,
                              n_recon=10,
                              vertical=False,
                              num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5)
    util.show_weights(rbm, show_plot, n_weights=25)
    # close the HDF5 store
    data.close()
    print("Done")
Esempio n. 4
0
def test_grbm_reload():
    vis_layer = layers.BernoulliLayer(num_vis, center=True)
    hid_layer = layers.GaussianLayer(num_hid, center=True)

    # create some extrinsics
    grbm = BoltzmannMachine([vis_layer, hid_layer])
    data = batch.Batch({
        'train':
        batch.InMemoryTable(be.randn((10 * num_samples, num_vis)), num_samples)
    })
    grbm.initialize(data)
    with tempfile.NamedTemporaryFile() as file:
        # save the model
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
        # reload
        store = pandas.HDFStore(file.name, mode='r')
        grbm_reload = BoltzmannMachine.from_saved(store)
        store.close()
    # check the two models are consistent
    vis_data = vis_layer.random((num_samples, num_vis))
    data_state = State.from_visible(vis_data, grbm)
    vis_orig = grbm.deterministic_iteration(1, data_state)[0]
    vis_reload = grbm_reload.deterministic_iteration(1, data_state)[0]
    assert be.allclose(vis_orig, vis_reload)
    assert be.allclose(grbm.layers[0].moments.mean,
                       grbm_reload.layers[0].moments.mean)
    assert be.allclose(grbm.layers[0].moments.var,
                       grbm_reload.layers[0].moments.var)
    assert be.allclose(grbm.layers[1].moments.mean,
                       grbm_reload.layers[1].moments.mean)
    assert be.allclose(grbm.layers[1].moments.var,
                       grbm_reload.layers[1].moments.var)
Esempio n. 5
0
def run(num_epochs=10, show_plot=False):
    num_hidden_units = 100
    batch_size = 100
    mc_steps = 10
    beta_std = 0.6

    # set up the reader to get minibatches
    with util.create_batch(batch_size,
                           train_fraction=0.95,
                           transform=transform) as data:

        # set up the model and initialize the parameters
        vis_layer = layers.BernoulliLayer(data.ncols)
        hid_layer = layers.BernoulliLayer(num_hidden_units, center=False)

        rbm = BoltzmannMachine([vis_layer, hid_layer])
        rbm.connections[0].weights.add_penalty(
            {'matrix': pen.l2_penalty(0.001)})
        rbm.initialize(data, method='pca')

        print('training with persistent contrastive divergence')
        cd = fit.SGD(rbm, data)

        learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1)
        opt = optimizers.ADAM(stepsize=learning_rate)

        cd.train(opt, num_epochs, mcsteps=mc_steps, method=fit.pcd)
        util.show_metrics(rbm, cd.monitor)

        # evaluate the model
        valid = data.get('validate')
        util.show_reconstructions(rbm,
                                  valid,
                                  show_plot,
                                  n_recon=10,
                                  vertical=False,
                                  num_to_avg=10)
        util.show_fantasy_particles(rbm,
                                    valid,
                                    show_plot,
                                    n_fantasy=5,
                                    beta_std=beta_std,
                                    fantasy_steps=100)

        util.show_weights(rbm, show_plot, n_weights=100)
        print("Done")

    return rbm
Esempio n. 6
0
def test_bernoulli_GFE_derivatives():
    # Tests that the GFE derivative update increases GFE versus 100
    # random update vectors
    num_units = 5

    layer_1 = layers.BernoulliLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)
    layer_3 = layers.BernoulliLayer(num_units)
    rbm = BoltzmannMachine([layer_1, layer_2, layer_3])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, cop1_GFE = rbm.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 1.0
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(init_lr=0.1, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0.0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Bernoulli models"
                    break
                else:
                    lr *= 0.5
            else:
                break
Esempio n. 7
0
def test_state_for_grad_DrivenSequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)

    for u in ['markov_chain', 'mean_field_iteration', 'deterministic_iteration']:
        # set up the sampler
        sampler = samplers.SequentialMC(rbm, updater=u, clamped=[0])
        sampler.set_state(data_state)

        # update the state of the hidden layer
        grad_state = sampler.state_for_grad(1)

        assert be.allclose(data_state[0], grad_state[0]), \
        "visible layer is clamped, and shouldn't get updated: {}".format(u)

        assert not be.allclose(data_state[1], grad_state[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)

        # compute the conditional mean with the layer function
        ave = rbm.layers[1].conditional_mean(
                rbm._connected_rescaled_units(1, data_state),
                rbm._connected_weights(1))

        assert be.allclose(ave, grad_state[1]), \
        "hidden layer of grad_state should be conditional mean: {}".format(u)
Esempio n. 8
0
def test_gaussian_GFE_derivatives_gradient_descent():
    num_units = 5

    layer_1 = layers.GaussianLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)

    rbm = BoltzmannMachine([layer_1, layer_2])

    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    state, GFE = rbm.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)
    grad = rbm._grad_gibbs_free_energy(state)
    gu.grad_normalize_(grad)

    for i in range(100):
        lr = 0.001
        gogogo = True
        random_grad = gu.random_grad(rbm)
        gu.grad_normalize_(random_grad)
        while gogogo:
            cop1 = deepcopy(rbm)
            lr_mul = partial(be.tmul, lr)

            cop1.parameter_update(gu.grad_apply(lr_mul, grad))
            cop1_state, cop1_GFE = cop1.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            cop2 = deepcopy(rbm)
            cop2.parameter_update(gu.grad_apply(lr_mul, random_grad))
            cop2_state, cop2_GFE = cop2.compute_StateTAP(use_GD=False, tol=1e-7, max_iters=50)

            regress = cop2_GFE - cop1_GFE < 0

            if regress:
                if lr < 1e-6:
                    assert False, \
                    "TAP FE gradient is not working properly for Gaussian models"
                    break
                else:
                    lr *= 0.5
            else:
                break
Esempio n. 9
0
def run(num_epochs=5, show_plot=False):

    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=3.0)
    mc_steps = 1

    # set up the reader to get minibatches
    data = util.create_batch(batch_size,
                             train_fraction=0.95,
                             transform=transform)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.connections[0].weights.add_penalty(
        {'matrix': pen.l1_adaptive_decay_penalty_2(0.00001)})
    rbm.initialize(data, 'glorot_normal')

    opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-4)

    tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0)
    sgd = fit.SGD(rbm, data)
    sgd.monitor.generator_metrics.append(TAPLogLikelihood())
    sgd.monitor.generator_metrics.append(TAPFreeEnergy())

    # fit the model
    print('Training with stochastic gradient ascent using TAP expansion')
    sgd.train(opt, num_epochs, method=tap.tap_update, mcsteps=mc_steps)

    util.show_metrics(rbm, sgd.monitor)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              show_plot,
                              n_recon=10,
                              vertical=False,
                              num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5)
    util.show_weights(rbm, show_plot, n_weights=25)
    # close the HDF5 store
    data.close()
    print("Done")
Esempio n. 10
0
def run(pretrain_epochs=5, finetune_epochs=5, fit_method=fit.LayerwisePretrain,
        show_plot=False):
    num_hidden_units = [20**2, 15**2, 10**2]
    batch_size = 100
    mc_steps = 5
    beta_std = 0.6

    # set up the reader to get minibatches
    data = util.create_batch(batch_size, train_fraction=0.95, transform=transform)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = [layers.BernoulliLayer(n) for n in num_hidden_units]
    rbm = BoltzmannMachine([vis_layer] + hid_layer)

    # add some penalties
    for c in rbm.connections:
        c.weights.add_penalty({"matrix": pen.l1_adaptive_decay_penalty_2(1e-4)})

    print("Norms of the weights before training")
    util.weight_norm_histogram(rbm, show_plot=show_plot)

    print('pre-training with persistent contrastive divergence')
    cd = fit_method(rbm, data)
    learning_rate = schedules.PowerLawDecay(initial=5e-3, coefficient=1)
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd.train(opt, pretrain_epochs, method=fit.pcd, mcsteps=mc_steps,
             init_method="glorot_normal")

    util.show_weights(rbm, show_plot, n_weights=16)

    print('fine tuning')
    cd = fit.StochasticGradientDescent(rbm, data)
    cd.monitor.generator_metrics.append(M.JensenShannonDivergence())

    learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=1)
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd.train(opt, finetune_epochs, mcsteps=mc_steps, beta_std=beta_std)
    util.show_metrics(rbm, cd.monitor)

    # evaluate the model
    valid = data.get('validate')
    util.show_reconstructions(rbm, valid, show_plot, num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=10,
                                beta_std=beta_std, fantasy_steps=100)

    util.show_weights(rbm, show_plot, n_weights=16)

    print("Norms of the weights after training")

    util.weight_norm_histogram(rbm, show_plot=show_plot)

    # close the HDF5 store
    data.close()
    print("Done")

    return rbm
Esempio n. 11
0
    def run(num_epochs=1, show_plot=False):
        num_hidden_units = 1
        batch_size = 100
        mc_steps = 10
        beta_std = 0.6

        # set up the reader to get minibatches
        with batch.in_memory_batch(samples, batch_size,
                                   train_fraction=0.95) as data:

            # set up the model and initialize the parameters
            vis_layer = layers.BernoulliLayer(data.ncols)
            hid_layer = layers.BernoulliLayer(num_hidden_units, center=False)
            rbm = BoltzmannMachine([vis_layer, hid_layer])

            rbm.connections[0].weights.add_penalty(
                {'matrix': pen.l2_penalty(0.001)})  # Add regularization term

            rbm.initialize(data, method='hinton')  # Initialize weights

            cd = fit.SGD(rbm, data)
            learning_rate = schedules.PowerLawDecay(initial=0.01,
                                                    coefficient=0.1)
            opt = optimizers.ADAM(stepsize=learning_rate)

            print("Train the model...")
            cd.train(opt,
                     num_epochs,
                     mcsteps=mc_steps,
                     method=fit.pcd,
                     verbose=False)
            '''
            # write on file KL divergences
            reverse_KL_div = [ cd.monitor.memory[i]['ReverseKLDivergence'] for i in range(0,len(cd.monitor.memory)) ]
            KL_div = [ cd.monitor.memory[i]['KLDivergence'] for i in range(0,len(cd.monitor.memory)) ]
            for i in range(0,len(cd.monitor.memory)):
            	out_file1.write(str(KL_div[i])+" "+str(reverse_KL_div[i])+"\n")
            out_file1.close()

            # save weights on file
            filename = "results/weights/weights-"+temperature[:-4]+".jpg"
            Gprotein_util.show_weights(rbm, show_plot=False, n_weights=8, Filename=filename, random=False)
            '''
        return rbm
Esempio n. 12
0
def run(num_epochs=10, show_plot=False):

    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.001, coefficient=0.1)
    mc_steps = 1

    # set up the reader to get minibatches
    data = util.create_batch(batch_size,
                             train_fraction=0.95,
                             transform=transform)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.GaussianLayer(num_hidden_units)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd = fit.SGD(rbm, data)

    # fit the model
    print('training with contrastive divergence')
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps)

    # evaluate the model
    util.show_metrics(rbm, cd.monitor)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              show_plot,
                              n_recon=10,
                              vertical=False,
                              num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5)
    util.show_weights(rbm, show_plot, n_weights=25)

    # close the HDF5 store
    data.close()
    print("Done")
Esempio n. 13
0
def test_gaussian_Compute_StateTAP_GD():
    num_units = 10

    layer_1 = layers.GaussianLayer(num_units)
    layer_2 = layers.BernoulliLayer(num_units)

    rbm = BoltzmannMachine([layer_1, layer_2])
    for i in range(len(rbm.connections)):
        rbm.connections[i].weights.params.matrix[:] = \
        0.01 * be.randn(rbm.connections[i].shape)

    for lay in rbm.layers:
        lay.params.loc[:] = be.rand_like(lay.params.loc)

    for i in range(100):
        random_state = StateTAP.from_model_rand(rbm)
        GFE = rbm.gibbs_free_energy(random_state.cumulants)
        _,min_GFE = rbm._compute_StateTAP_GD(seed=random_state)

        if GFE - min_GFE < 0.0:
            assert False, \
                "compute_StateTAP_self_consistent is not reducing the GFE"
Esempio n. 14
0
def run(num_epochs=20, show_plot=False):
    num_hidden_units = 200
    batch_size = 100
    mc_steps = 10
    beta_std = 0.95

    # set up the reader to get minibatches
    data = util.create_batch(batch_size, train_fraction=0.95, transform=transform)

    # set up the model and initialize the parameters
    vis_layer = layers.GaussianLayer(data.ncols, center=False)
    hid_layer = layers.BernoulliLayer(num_hidden_units, center=True)
    hid_layer.set_fixed_params(hid_layer.get_param_names())

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data, 'pca', epochs = 500, verbose=True)

    print('training with persistent contrastive divergence')
    cd = fit.SGD(rbm, data, fantasy_steps=10)
    cd.monitor.generator_metrics.append(M.JensenShannonDivergence())

    learning_rate = schedules.PowerLawDecay(initial=1e-3, coefficient=5)
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps,
             beta_std=beta_std, burn_in=1)

    # evaluate the model
    util.show_metrics(rbm, cd.monitor)
    valid = data.get('validate')
    util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5)
    util.show_weights(rbm, show_plot, n_weights=100)

    # close the HDF5 store
    data.close()
    print("Done")

    return rbm
Esempio n. 15
0
def test_random_grad():
    num_visible_units = 100
    num_hidden_units = 50

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # create a gradient object filled with random numbers
    gu.random_grad(rbm)
Esempio n. 16
0
def test_grad_normalize_():
    num_visible_units = 10
    num_hidden_units = 10

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # create a gradient object filled with random numbers
    grad = gu.random_grad(rbm)
    gu.grad_normalize_(grad)
    nrm = gu.grad_norm(grad)
    assert nrm > 1-1e-6
    assert nrm < 1+1e-6
Esempio n. 17
0
def test_grbm_save():
    vis_layer = layers.BernoulliLayer(num_vis, center=True)
    hid_layer = layers.GaussianLayer(num_hid, center=True)
    grbm = BoltzmannMachine([vis_layer, hid_layer])
    data = batch.Batch({
        'train':
        batch.InMemoryTable(be.randn((10 * num_samples, num_vis)), num_samples)
    })
    grbm.initialize(data)
    with tempfile.NamedTemporaryFile() as file:
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
Esempio n. 18
0
def test_onehot_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.OneHotLayer(num_visible_units)
    hid_layer = layers.OneHotLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute conditional parameters
    hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units)
    hidden_field += b

    visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units)
    visible_field += a

    # compute conditional parameters with layer funcitons
    hidden_field_layer = rbm.layers[1].conditional_params(
        [vdata], [rbm.connections[0].W()])
    visible_field_layer = rbm.layers[0].conditional_params(
        [hdata], [rbm.connections[0].W(trans=True)])

    assert be.allclose(hidden_field, hidden_field_layer), \
    "hidden field wrong in onehot-onehot rbm"

    assert be.allclose(visible_field, visible_field_layer), \
    "visible field wrong in onehot-onehot rbm"
Esempio n. 19
0
def test_grad_norm():
    num_visible_units = 1000
    num_hidden_units = 1000

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # create a gradient object filled with random numbers
    grad = gu.random_grad(rbm)
    nrm = gu.grad_norm(grad)
    assert nrm > math.sqrt(be.float_scalar(num_hidden_units + \
        num_visible_units + num_visible_units*num_hidden_units)/3) - 1
    assert nrm < math.sqrt(be.float_scalar(num_hidden_units + \
        num_visible_units + num_visible_units*num_hidden_units)/3) + 1
Esempio n. 20
0
def test_clamped_DrivenSequentialMC():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25
    steps = 1

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.BernoulliLayer(num_visible_units)
    hid_layer = layers.BernoulliLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    data_state = State.from_visible(vdata, rbm)

    for u in ['markov_chain', 'mean_field_iteration', 'deterministic_iteration']:

        # set up the sampler with the visible layer clamped
        sampler = samplers.SequentialMC(rbm, updater=u, clamped=[0])
        sampler.set_state(data_state)

        # update the sampler state and check the output
        sampler.update_state(steps)

        assert be.allclose(data_state[0], sampler.state[0]), \
        "visible layer is clamped, and shouldn't get updated: {}".format(u)

        assert not be.allclose(data_state[1], sampler.state[1]), \
        "hidden layer is not clamped, and should get updated: {}".format(u)
Esempio n. 21
0
def test_rbm(paysage_path=None):

    num_hidden_units = 50
    batch_size = 50
    num_epochs = 1
    learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1)
    mc_steps = 1

    if not paysage_path:
        paysage_path = os.path.dirname(
            os.path.dirname(os.path.abspath(__file__)))
    filepath = os.path.join(paysage_path, 'examples', 'mnist', 'mnist.h5')

    if not os.path.exists(filepath):
        raise IOError(
            "{} does not exist. run mnist/download_mnist.py to fetch from the web"
            .format(filepath))

    shuffled_filepath = os.path.join(paysage_path, 'examples', 'mnist',
                                     'shuffled_mnist.h5')

    # shuffle the data
    if not os.path.exists(shuffled_filepath):
        shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0)
        shuffler.shuffle()

    # set a seed for the random number generator
    be.set_seed()

    import pandas
    samples = pre.binarize_color(
        be.float_tensor(
            pandas.read_hdf(shuffled_filepath,
                            key='train/images').values[:10000]))
    samples_train, samples_validate = batch.split_tensor(samples, 0.95)
    data = batch.Batch({
        'train':
        batch.InMemoryTable(samples_train, batch_size),
        'validate':
        batch.InMemoryTable(samples_validate, batch_size)
    })

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data)

    # obtain initial estimate of the reconstruction error
    perf = ProgressMonitor()
    untrained_performance = perf.epoch_update(data,
                                              rbm,
                                              store=True,
                                              show=False)

    # set up the optimizer and the fit method
    opt = optimizers.RMSProp(stepsize=learning_rate)
    cd = fit.SGD(rbm, data)

    # fit the model
    print('training with contrastive divergence')
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps)

    # obtain an estimate of the reconstruction error after 1 epoch
    trained_performance = cd.monitor.memory[-1]

    assert (trained_performance['ReconstructionError'] <
            untrained_performance['ReconstructionError']), \
    "Reconstruction error did not decrease"

    # close the HDF5 store
    data.close()
Esempio n. 22
0
def test_rmb_construction():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.BernoulliLayer(num_hid)
    rbm = BoltzmannMachine([vis_layer, hid_layer])
Esempio n. 23
0
def test_hopfield_construction():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    rbm = BoltzmannMachine([vis_layer, hid_layer])
Esempio n. 24
0
def test_grbm_config():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    grbm = BoltzmannMachine([vis_layer, hid_layer])
    grbm.get_config()
Esempio n. 25
0
def test_onehot_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.OneHotLayer(num_visible_units)
    hid_layer = layers.OneHotLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    vdata_scaled = rbm.layers[0].rescale(vdata)

    # compute the conditional mean of the hidden layer
    hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.connections[0].W()])
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_visible_loc = -be.mean(vdata, axis=0)
    d_hidden_loc = -be.mean(hid_mean_scaled, axis=0)
    d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                            [rbm.connections[0].W(trans=True)])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                          [rbm.connections[0].W()])

    weight_derivs = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled)

    # compute simple weighted derivatives using the layer functions
    scale = 2
    scale_func = partial(be.multiply, be.float_scalar(scale))
    vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                        [rbm.connections[0].W(trans=True)], weighting_function=scale_func)

    hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                          [rbm.connections[0].W()], weighting_function=scale_func)

    weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled,
                                                weighting_function=scale_func)

    assert be.allclose(d_visible_loc, vis_derivs[0].loc), \
    "derivative of visible loc wrong in onehot-onehot rbm"

    assert be.allclose(d_hidden_loc, hid_derivs[0].loc), \
    "derivative of hidden loc wrong in onehot-onehot rbm"

    assert be.allclose(d_W, weight_derivs[0].matrix), \
    "derivative of weights wrong in onehot-onehot rbm"

    assert be.allclose(scale * d_visible_loc, vis_derivs_scaled[0].loc), \
    "weighted derivative of visible loc wrong in onehot-onehot rbm"

    assert be.allclose(scale * d_hidden_loc, hid_derivs_scaled[0].loc), \
    "weighted derivative of hidden loc wrong in onehot-onehot rbm"

    assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \
    "weighted derivative of weights wrong in onehot-onehot rbm"
Esempio n. 26
0
def test_gaussian_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    log_var_a = 0.1 * be.randn((num_visible_units,))
    log_var_b = 0.1 * be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.layers[0].params.log_var[:] = log_var_a
    rbm.layers[1].params.log_var[:] = log_var_b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    visible_var = be.exp(log_var_a)
    vdata_scaled = vdata / visible_var

    # compute the mean of the hidden layer
    hid_mean = rbm.layers[1].conditional_mean(
        [vdata_scaled], [rbm.connections[0].W()])
    hidden_var = be.exp(log_var_b)
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_vis_loc = be.mean((a-vdata)/visible_var, axis=0)
    d_vis_logvar = -0.5 * be.mean(be.square(be.subtract(a, vdata)), axis=0)
    d_vis_logvar += be.batch_quadratic(hid_mean_scaled, be.transpose(W), vdata,
                                 axis=0) / len(vdata)
    d_vis_logvar /= visible_var

    d_hid_loc = be.mean((b-hid_mean)/hidden_var, axis=0)

    d_hid_logvar = -0.5 * be.mean(be.square(hid_mean - b), axis=0)
    d_hid_logvar += be.batch_quadratic(vdata_scaled, W, hid_mean,
                                 axis=0) / len(hid_mean)
    d_hid_logvar /= hidden_var

    d_W = -be.batch_outer(vdata_scaled, hid_mean_scaled) / len(vdata_scaled)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                            [rbm.connections[0].W(trans=True)])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                           [rbm.connections[0].W()])

    weight_derivs = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled)

    # compute simple weighted derivatives using the layer functions
    scale = 2
    scale_func = partial(be.multiply, be.float_scalar(scale))
    vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                        [rbm.connections[0].W(trans=True)], weighting_function=scale_func)

    hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                        [rbm.connections[0].W()], weighting_function=scale_func)

    weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata_scaled, hid_mean_scaled,
                                                weighting_function=scale_func)

    assert be.allclose(d_vis_loc, vis_derivs[0].loc), \
    "derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_loc, hid_derivs[0].loc), \
    "derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(d_vis_logvar, vis_derivs[0].log_var, rtol=1e-05, atol=1e-01), \
    "derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_hid_logvar, hid_derivs[0].log_var, rtol=1e-05, atol=1e-01), \
    "derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(d_W, weight_derivs[0].matrix), \
    "derivative of weights wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_vis_loc, vis_derivs_scaled[0].loc), \
    "weighted derivative of visible loc wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_hid_loc, hid_derivs_scaled[0].loc), \
    "weighted derivative of hidden loc wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_vis_logvar, vis_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \
    "weighted derivative of visible log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_hid_logvar, hid_derivs_scaled[0].log_var, rtol=1e-05, atol=1e-01), \
    "weighted derivative of hidden log_var wrong in gaussian-gaussian rbm"

    assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \
    "weighted derivative of weights wrong in gaussian-gaussian rbm"
Esempio n. 27
0
def test_gaussian_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.GaussianLayer(num_visible_units)
    hid_layer = layers.GaussianLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    log_var_a = 0.1 * be.randn((num_visible_units,))
    log_var_b = 0.1 * be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.layers[0].params.log_var[:] = log_var_a
    rbm.layers[1].params.log_var[:] = log_var_b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute the variance
    visible_var = be.exp(log_var_a)
    hidden_var = be.exp(log_var_b)

    # rescale the data
    vdata_scaled = vdata / visible_var
    hdata_scaled = hdata / hidden_var

    # test rescale
    assert be.allclose(vdata_scaled, rbm.layers[0].rescale(vdata)),\
    "visible rescale wrong in gaussian-gaussian rbm"

    assert be.allclose(hdata_scaled, rbm.layers[1].rescale(hdata)),\
    "hidden rescale wrong in gaussian-gaussian rbm"

    # compute the mean
    hidden_mean = be.dot(vdata_scaled, W) # (batch_size, num_hidden_units)
    hidden_mean += b

    visible_mean = be.dot(hdata_scaled, be.transpose(W)) # (batch_size, num_hidden_units)
    visible_mean += a

    # update the conditional parameters using the layer functions
    vis_mean_func, vis_var_func = rbm.layers[0].conditional_params(
        [hdata_scaled], [rbm.connections[0].W(trans=True)])
    hid_mean_func, hid_var_func = rbm.layers[1].conditional_params(
        [vdata_scaled], [rbm.connections[0].W()])

    assert be.allclose(visible_var, vis_var_func),\
    "visible variance wrong in gaussian-gaussian rbm"

    assert be.allclose(hidden_var, hid_var_func),\
    "hidden variance wrong in gaussian-gaussian rbm"

    assert be.allclose(visible_mean, vis_mean_func),\
    "visible mean wrong in gaussian-gaussian rbm"

    assert be.allclose(hidden_mean, hid_mean_func),\
    "hidden mean wrong in gaussian-gaussian rbm"
Esempio n. 28
0
def test_independent():
    """
    Test sampling from an rbm with two layers connected by a weight matrix that
    contains all zeros, so that the layers are independent.

    Note:
        This test compares values estimated by *sampling* to values computed
        analytically. It can fail for small batch_size, or strict tolerances,
        even if everything is working propery.

    """
    num_visible_units = 20
    num_hidden_units = 10
    batch_size = 1000
    steps = 100
    mean_tol = 0.2
    corr_tol = 0.2

    # set a seed for the random number generator
    be.set_seed()

    layer_types = [
            layers.BernoulliLayer,
            layers.GaussianLayer]

    for layer_type in layer_types:
        # set up some layer and model objects
        vis_layer = layer_type(num_visible_units)
        hid_layer = layer_type(num_hidden_units)
        rbm = BoltzmannMachine([vis_layer, hid_layer])

        # randomly set the intrinsic model parameters
        a = be.rand((num_visible_units,))
        b = be.rand((num_hidden_units,))
        W = be.zeros((num_visible_units, num_hidden_units))

        rbm.layers[0].params.loc[:] = a
        rbm.layers[1].params.loc[:] = b
        rbm.connections[0].weights.params.matrix[:] = W

        if layer_type == layers.GaussianLayer:
            log_var_a = be.randn((num_visible_units,))
            log_var_b = be.randn((num_hidden_units,))
            rbm.layers[0].params.log_var[:] = log_var_a
            rbm.layers[1].params.log_var[:] = log_var_b

        # initialize a state
        state = State.from_model(batch_size, rbm)

        # run a markov chain to update the state
        state = rbm.markov_chain(steps, state)

        # compute the mean
        state_for_moments = State.from_model(1, rbm)
        sample_mean = [be.mean(state[i], axis=0) for i in range(state.len)]
        model_mean = [rbm.layers[i].conditional_mean(
                rbm._connected_rescaled_units(i, state_for_moments),
                rbm._connected_weights(i)) for i in range(rbm.num_layers)]

        # check that the means are roughly equal
        for i in range(rbm.num_layers):
            ave = sample_mean[i]
            close = be.allclose(ave, model_mean[i][0], rtol=mean_tol, atol=mean_tol)
            assert close, "{0} {1}: sample mean does not match model mean".format(layer_type, i)

        # check the cross correlation between the layers
        crosscov = be.cov(state[0], state[1])
        norm = be.outer(be.std(state[0], axis=0), be.std(state[1], axis=0))
        crosscorr = be.divide(norm, crosscov)
        assert be.tmax(be.tabs(crosscorr)) < corr_tol, "{} cross correlation too large".format(layer_type)
Esempio n. 29
0
def test_conditional_sampling():
    """
    Test sampling from one layer conditioned on the state of another layer.

    Note:
        This test compares values estimated by *sampling* to values computed
        analytically. It can fail for small batch_size, or strict tolerances,
        even if everything is working propery.

    """
    num_visible_units = 20
    num_hidden_units = 10
    steps = 1000
    mean_tol = 0.1

    # set a seed for the random number generator
    be.set_seed()

    layer_types = [
            layers.BernoulliLayer,
            layers.GaussianLayer]

    for layer_type in layer_types:
        # set up some layer and model objects
        vis_layer = layer_type(num_visible_units)
        hid_layer = layer_type(num_hidden_units)
        rbm = BoltzmannMachine([vis_layer, hid_layer])

        # randomly set the intrinsic model parameters
        a = be.rand((num_visible_units,))
        b = be.rand((num_hidden_units,))
        W = 10 * be.rand((num_visible_units, num_hidden_units))

        rbm.layers[0].params.loc[:] = a
        rbm.layers[1].params.loc[:] = b
        rbm.connections[0].weights.params.matrix[:] = W

        if layer_type == layers.GaussianLayer:
            log_var_a = be.randn((num_visible_units,))
            log_var_b = be.randn((num_hidden_units,))
            rbm.layers[0].params.log_var[:] = log_var_a
            rbm.layers[1].params.log_var[:] = log_var_b

        # initialize a state
        state = State.from_model(1, rbm)

        # set up a calculator for the moments
        moments = mu.MeanVarianceArrayCalculator()

        for _ in range(steps):
            moments.update(rbm.layers[0].conditional_sample(
                    rbm._connected_rescaled_units(0, state),
                    rbm._connected_weights(0)))

        model_mean = rbm.layers[0].conditional_mean(
                rbm._connected_rescaled_units(0, state),
                rbm._connected_weights(0))


        ave = moments.mean

        close = be.allclose(ave, model_mean[0], rtol=mean_tol, atol=mean_tol)
        assert close, "{} conditional mean".format(layer_type)

        if layer_type == layers.GaussianLayer:
            model_mean, model_var = rbm.layers[0].conditional_params(
                rbm._connected_rescaled_units(0, state),
                rbm._connected_weights(0))

            close = be.allclose(be.sqrt(moments.var), be.sqrt(model_var[0]), rtol=mean_tol, atol=mean_tol)
            assert close, "{} conditional standard deviation".format(layer_type)
Esempio n. 30
0
def test_tap_machine(paysage_path=None):
    num_hidden_units = 10
    batch_size = 100
    num_epochs = 5
    learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=1.0)

    if not paysage_path:
        paysage_path = os.path.dirname(
            os.path.dirname(os.path.abspath(__file__)))
    filepath = os.path.join(paysage_path, 'examples', 'mnist', 'mnist.h5')

    if not os.path.exists(filepath):
        raise IOError(
            "{} does not exist. run mnist/download_mnist.py to fetch from the web"
            .format(filepath))

    shuffled_filepath = os.path.join(paysage_path, 'examples', 'mnist',
                                     'shuffled_mnist.h5')

    # shuffle the data
    if not os.path.exists(shuffled_filepath):
        shuffler = batch.DataShuffler(filepath, shuffled_filepath, complevel=0)
        shuffler.shuffle()

    # set a seed for the random number generator
    be.set_seed()

    # set up the reader to get minibatches
    samples = pre.binarize_color(
        be.float_tensor(
            pandas.read_hdf(shuffled_filepath,
                            key='train/images').as_matrix()[:10000]))
    samples_train, samples_validate = batch.split_tensor(samples, 0.95)
    data = batch.Batch({
        'train':
        batch.InMemoryTable(samples_train, batch_size),
        'validate':
        batch.InMemoryTable(samples_validate, batch_size)
    })

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.BernoulliLayer(num_hidden_units)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data)

    # obtain initial estimate of the reconstruction error
    perf = ProgressMonitor(generator_metrics = \
            [ReconstructionError(), TAPLogLikelihood(10), TAPFreeEnergy(10)])
    untrained_performance = perf.epoch_update(data,
                                              rbm,
                                              store=True,
                                              show=False)

    # set up the optimizer and the fit method
    opt = optimizers.Gradient(stepsize=learning_rate, tolerance=1e-5)
    tap = fit.TAP(True, 0.1, 0.01, 25, True, 0.5, 0.001, 0.0)
    solver = fit.SGD(rbm, data)
    solver.monitor.generator_metrics.append(TAPLogLikelihood(10))
    solver.monitor.generator_metrics.append(TAPFreeEnergy(10))

    # fit the model
    print('training with stochastic gradient ascent')
    solver.train(opt, num_epochs, method=tap.tap_update)

    # obtain an estimate of the reconstruction error after 1 epoch
    trained_performance = solver.monitor.memory[-1]

    assert (trained_performance['TAPLogLikelihood'] >
            untrained_performance['TAPLogLikelihood']), \
    "TAP log-likelihood did not increase"
    assert (trained_performance['ReconstructionError'] <
            untrained_performance['ReconstructionError']), \
    "Reconstruction error did not decrease"

    # close the HDF5 store
    data.close()