예제 #1
0
def test_onehot_conditional_params():
    ly = layers.OneHotLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    scaled_units = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    beta = be.rand((num_samples, 1))
    ly._conditional_params(scaled_units, weights, beta)
예제 #2
0
def test_onehot_derivatives():
    ly = layers.OneHotLayer(num_vis)
    w = layers.Weights((num_vis, num_hid))
    vis = ly.random((num_samples, num_vis))
    hid = [be.randn((num_samples, num_hid))]
    weights = [w.W_T()]
    ly.derivatives(vis, hid, weights)
예제 #3
0
def test_onehot_build_from_config():
    ly = layers.OneHotLayer(num_vis)
    ly.add_constraint({'loc': constraints.non_negative})
    p = penalties.l2_penalty(0.37)
    ly.add_penalty({'loc': p})
    ly_new = layers.layer_from_config(ly.get_config())
    assert ly_new.get_config() == ly.get_config()
예제 #4
0
def test_onehot_conditional_params():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.OneHotLayer(num_visible_units)
    hid_layer = layers.OneHotLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    hdata = rbm.layers[1].random((batch_size, num_hidden_units))

    # compute conditional parameters
    hidden_field = be.dot(vdata, W) # (batch_size, num_hidden_units)
    hidden_field += b

    visible_field = be.dot(hdata, be.transpose(W)) # (batch_size, num_visible_units)
    visible_field += a

    # compute conditional parameters with layer funcitons
    hidden_field_layer = rbm.layers[1].conditional_params(
        [vdata], [rbm.connections[0].W()])
    visible_field_layer = rbm.layers[0].conditional_params(
        [hdata], [rbm.connections[0].W(trans=True)])

    assert be.allclose(hidden_field, hidden_field_layer), \
    "hidden field wrong in onehot-onehot rbm"

    assert be.allclose(visible_field, visible_field_layer), \
    "visible field wrong in onehot-onehot rbm"
예제 #5
0
def test_gaussian_1D_1mode_train():
    # create some example data
    num = 10000
    mu = 3
    sigma = 1
    samples = be.randn((num, 1)) * sigma + mu

    # set up the reader to get minibatches
    batch_size = 100
    samples_train, samples_validate = batch.split_tensor(samples, 0.9)
    data = batch.Batch({
        'train':
        batch.InMemoryTable(samples_train, batch_size),
        'validate':
        batch.InMemoryTable(samples_validate, batch_size)
    })

    # parameters
    learning_rate = schedules.PowerLawDecay(initial=0.1, coefficient=0.1)
    mc_steps = 1
    num_epochs = 10
    num_sample_steps = 100

    # set up the model and initialize the parameters
    vis_layer = layers.GaussianLayer(1)
    hid_layer = layers.OneHotLayer(1)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.initialize(data, method='hinton')

    # modify the parameters to shift the initialized model from the data
    # this forces it to train
    rbm.layers[0].params = layers.ParamsGaussian(
        rbm.layers[0].params.loc - 3, rbm.layers[0].params.log_var - 1)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd = fit.SGD(rbm, data)

    # fit the model
    print('training with persistent contrastive divergence')
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps)

    # sample data from the trained model
    model_state = \
        samplers.SequentialMC.generate_fantasy_state(rbm, num, num_sample_steps)
    pts_trained = model_state[0]

    percent_error = 10
    mu_trained = be.mean(pts_trained)
    assert numpy.abs(mu_trained / mu - 1) < (percent_error / 100)

    sigma_trained = numpy.sqrt(be.var(pts_trained))
    assert numpy.abs(sigma_trained / sigma - 1) < (percent_error / 100)
예제 #6
0
def run(num_epochs=10, show_plot=False):
    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1)
    mc_steps = 1

    # set up the reader to get minibatches
    data = util.create_batch(batch_size,
                             train_fraction=0.95,
                             transform=transform)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.OneHotLayer(num_hidden_units)

    rbm = BoltzmannMachine([vis_layer, hid_layer])
    rbm.connections[0].weights.add_penalty({'matrix': pen.l2_penalty(0.001)})
    rbm.initialize(data, method='glorot_normal')

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)
    cd = fit.SGD(rbm, data)

    # fit the model
    print('training with persistent contrastive divergence')
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps)

    # evaluate the model
    util.show_metrics(rbm, cd.monitor)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              show_plot,
                              n_recon=10,
                              vertical=False,
                              num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5)
    util.show_weights(rbm, show_plot, n_weights=25)

    # close the HDF5 store
    data.close()
    print("Done")
예제 #7
0
def run(paysage_path=None, num_epochs=10, show_plot=False):
    num_hidden_units = 256
    batch_size = 100
    learning_rate = schedules.PowerLawDecay(initial=0.01, coefficient=0.1)
    mc_steps = 1

    (_, _, shuffled_filepath) = \
            util.default_paths(paysage_path)

    # set up the reader to get minibatches
    data = batch.HDFBatch(shuffled_filepath,
                          'train/images',
                          batch_size,
                          transform=pre.binarize_color,
                          train_fraction=0.95)

    # set up the model and initialize the parameters
    vis_layer = layers.BernoulliLayer(data.ncols)
    hid_layer = layers.OneHotLayer(num_hidden_units)

    rbm = model.Model([vis_layer, hid_layer])
    rbm.weights[0].add_penalty({'matrix': pen.l2_penalty(0.001)})
    rbm.initialize(data, method='glorot_normal')

    metrics = [
        'ReconstructionError', 'EnergyDistance', 'EnergyGap', 'EnergyZscore',
        'HeatCapacity', 'WeightSparsity', 'WeightSquare'
    ]
    perf = fit.ProgressMonitor(data, metrics=metrics)

    # set up the optimizer and the fit method
    opt = optimizers.ADAM(stepsize=learning_rate)

    sampler = fit.DrivenSequentialMC.from_batch(rbm, data)

    cd = fit.SGD(rbm,
                 data,
                 opt,
                 num_epochs,
                 method=fit.pcd,
                 sampler=sampler,
                 mcsteps=mc_steps,
                 monitor=perf)

    # fit the model
    print('training with persistent contrastive divergence')
    cd.train()

    # evaluate the model
    util.show_metrics(rbm, perf)
    valid = data.get('validate')
    util.show_reconstructions(rbm,
                              valid,
                              fit,
                              show_plot,
                              n_recon=10,
                              vertical=False,
                              num_to_avg=10)
    util.show_fantasy_particles(rbm, valid, fit, show_plot, n_fantasy=25)
    util.show_weights(rbm, show_plot, n_weights=9)

    # close the HDF5 store
    data.close()
    print("Done")
예제 #8
0
def test_OneHot_creation():
    layers.OneHotLayer(num_vis, 0)
예제 #9
0
def test_onehot_online_param_update():
    ly = layers.OneHotLayer(num_vis)
    vis = ly.random((num_samples, num_vis))
    ly.online_param_update(vis)
예제 #10
0
def test_onehot_energy():
    ly = layers.OneHotLayer(num_vis)
    vis = ly.random((num_samples, num_vis))
    ly.energy(vis)
예제 #11
0
def test_onehot_derivatives():
    num_visible_units = 100
    num_hidden_units = 50
    batch_size = 25

    # set a seed for the random number generator
    be.set_seed()

    # set up some layer and model objects
    vis_layer = layers.OneHotLayer(num_visible_units)
    hid_layer = layers.OneHotLayer(num_hidden_units)
    rbm = BoltzmannMachine([vis_layer, hid_layer])

    # randomly set the intrinsic model parameters
    a = be.randn((num_visible_units,))
    b = be.randn((num_hidden_units,))
    W = be.randn((num_visible_units, num_hidden_units))

    rbm.layers[0].params.loc[:] = a
    rbm.layers[1].params.loc[:] = b
    rbm.connections[0].weights.params.matrix[:] = W

    # generate a random batch of data
    vdata = rbm.layers[0].random((batch_size, num_visible_units))
    vdata_scaled = rbm.layers[0].rescale(vdata)

    # compute the conditional mean of the hidden layer
    hid_mean = rbm.layers[1].conditional_mean([vdata], [rbm.connections[0].W()])
    hid_mean_scaled = rbm.layers[1].rescale(hid_mean)

    # compute the derivatives
    d_visible_loc = -be.mean(vdata, axis=0)
    d_hidden_loc = -be.mean(hid_mean_scaled, axis=0)
    d_W = -be.batch_outer(vdata, hid_mean_scaled) / len(vdata)

    # compute the derivatives using the layer functions
    vis_derivs = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                                            [rbm.connections[0].W(trans=True)])

    hid_derivs = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                                          [rbm.connections[0].W()])

    weight_derivs = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled)

    # compute simple weighted derivatives using the layer functions
    scale = 2
    scale_func = partial(be.multiply, be.float_scalar(scale))
    vis_derivs_scaled = rbm.layers[0].derivatives(vdata, [hid_mean_scaled],
                        [rbm.connections[0].W(trans=True)], weighting_function=scale_func)

    hid_derivs_scaled = rbm.layers[1].derivatives(hid_mean, [vdata_scaled],
                          [rbm.connections[0].W()], weighting_function=scale_func)

    weight_derivs_scaled = rbm.connections[0].weights.derivatives(vdata, hid_mean_scaled,
                                                weighting_function=scale_func)

    assert be.allclose(d_visible_loc, vis_derivs[0].loc), \
    "derivative of visible loc wrong in onehot-onehot rbm"

    assert be.allclose(d_hidden_loc, hid_derivs[0].loc), \
    "derivative of hidden loc wrong in onehot-onehot rbm"

    assert be.allclose(d_W, weight_derivs[0].matrix), \
    "derivative of weights wrong in onehot-onehot rbm"

    assert be.allclose(scale * d_visible_loc, vis_derivs_scaled[0].loc), \
    "weighted derivative of visible loc wrong in onehot-onehot rbm"

    assert be.allclose(scale * d_hidden_loc, hid_derivs_scaled[0].loc), \
    "weighted derivative of hidden loc wrong in onehot-onehot rbm"

    assert be.allclose(scale * d_W, weight_derivs_scaled[0].matrix), \
    "weighted derivative of weights wrong in onehot-onehot rbm"