Example #1
0
def test_ais():
    """
    Test ais computation by comparing the output of estimate_likelihood to
    Russ's code's output for the same parameters.
    """
    try:
        trainset = MNIST(which_set='train')
        testset = MNIST(which_set='test')
    except NoDataPathError:
        raise SkipTest("PYLEARN2_DATA_PATH environment variable not defined")

    nvis = 784
    nhid = 20
    # Random initialization of RBM parameters
    numpy.random.seed(98734)
    w_hid = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nvis,
                                                                     nhid))
    b_vis = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nvis))
    b_hid = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nhid))

    # Initialization of RBM
    visible_layer = BinaryVector(nvis)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=nhid, pool_size=1,
                                       layer_name='h', irange=0.1)
    rbm = DBM(100, visible_layer, [hidden_layer], 1)
    rbm.visible_layer.set_biases(b_vis)
    rbm.hidden_layers[0].set_weights(w_hid)
    rbm.hidden_layers[0].set_biases(b_hid)
    rbm.nvis = nvis
    rbm.nhid = nhid

    # Compute real logz and associated train_ll and test_ll using rbm_tools
    v_sample = T.matrix('v_sample')
    h_sample = T.matrix('h_sample')
    W = theano.shared(rbm.hidden_layers[0].get_weights())
    hbias = theano.shared(rbm.hidden_layers[0].get_biases())
    vbias = theano.shared(rbm.visible_layer.get_biases())

    wx_b = T.dot(v_sample, W) + hbias
    vbias_term = T.dot(v_sample, vbias)
    hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
    free_energy_v = -hidden_term - vbias_term
    free_energy_v_fn = theano.function(inputs=[v_sample],
                                       outputs=free_energy_v)

    wh_c = T.dot(h_sample, W.T) + vbias
    hbias_term = T.dot(h_sample, hbias)
    visible_term = T.sum(T.log(1 + T.exp(wh_c)), axis=1)
    free_energy_h = -visible_term - hbias_term
    free_energy_h_fn = theano.function(inputs=[h_sample],
                                       outputs=free_energy_h)

    real_logz = rbm_tools.compute_log_z(rbm, free_energy_h_fn)

    real_ais_train_ll = -rbm_tools.compute_nll(rbm,
                                               trainset.get_design_matrix(),
                                               real_logz, free_energy_v_fn)
    real_ais_test_ll = -rbm_tools.compute_nll(rbm, testset.get_design_matrix(),
                                              real_logz, free_energy_v_fn)

    # Compute train_ll, test_ll and logz using dbm_metrics
    train_ll, test_ll, logz = dbm_metrics.estimate_likelihood([W],
                                                              [vbias, hbias],
                                                              trainset,
                                                              testset,
                                                              pos_mf_steps=100)
    assert (real_logz - logz) < 2.0
    assert (real_ais_train_ll - train_ll) < 2.0
    assert (real_ais_test_ll - test_ll) < 2.0
Example #2
0
def test_ais():
    """
    Test ais computation by comparing the output of estimate_likelihood to
    Russ's code's output for the same parameters.
    """
    try:
        # TODO: the one_hot=True is only necessary because one_hot=False is
        # broken, remove it after one_hot=False is fixed.
        trainset = MNIST(which_set='train', one_hot=True)
        testset = MNIST(which_set='test', one_hot=True)
    except NoDataPathError:
        raise SkipTest("PYLEARN2_DATA_PATH environment variable not defined")

    nvis = 784
    nhid = 20
    # Random initialization of RBM parameters
    numpy.random.seed(98734)
    w_hid = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(
        nvis, nhid))
    b_vis = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nvis))
    b_hid = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nhid))

    # Initialization of RBM
    visible_layer = BinaryVector(nvis)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=nhid,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.1)
    rbm = DBM(100, visible_layer, [hidden_layer], 1)
    rbm.visible_layer.set_biases(b_vis)
    rbm.hidden_layers[0].set_weights(w_hid)
    rbm.hidden_layers[0].set_biases(b_hid)
    rbm.nvis = nvis
    rbm.nhid = nhid

    # Compute real logz and associated train_ll and test_ll using rbm_tools
    v_sample = T.matrix('v_sample')
    h_sample = T.matrix('h_sample')
    W = theano.shared(rbm.hidden_layers[0].get_weights())
    hbias = theano.shared(rbm.hidden_layers[0].get_biases())
    vbias = theano.shared(rbm.visible_layer.get_biases())

    wx_b = T.dot(v_sample, W) + hbias
    vbias_term = T.dot(v_sample, vbias)
    hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
    free_energy_v = -hidden_term - vbias_term
    free_energy_v_fn = theano.function(inputs=[v_sample],
                                       outputs=free_energy_v)

    wh_c = T.dot(h_sample, W.T) + vbias
    hbias_term = T.dot(h_sample, hbias)
    visible_term = T.sum(T.log(1 + T.exp(wh_c)), axis=1)
    free_energy_h = -visible_term - hbias_term
    free_energy_h_fn = theano.function(inputs=[h_sample],
                                       outputs=free_energy_h)

    real_logz = rbm_tools.compute_log_z(rbm, free_energy_h_fn)

    real_ais_train_ll = -rbm_tools.compute_nll(
        rbm, trainset.get_design_matrix(), real_logz, free_energy_v_fn)
    real_ais_test_ll = -rbm_tools.compute_nll(rbm, testset.get_design_matrix(),
                                              real_logz, free_energy_v_fn)

    # Compute train_ll, test_ll and logz using dbm_metrics
    train_ll, test_ll, logz = dbm_metrics.estimate_likelihood([W],
                                                              [vbias, hbias],
                                                              trainset,
                                                              testset,
                                                              pos_mf_steps=100)
    assert (real_logz - logz) < 2.0
    assert (real_ais_train_ll - train_ll) < 2.0
    assert (real_ais_test_ll - test_ll) < 2.0
    utils.debug.setdebug()

    recon = []
    nlls = []
    for j in range(0, 401):
        avg_rec_error = 0

        for i in range(0, 500, 100):
            rec_error = train_fn(data[i:i+100])
            recon.append(rec_error / 100)
            avg_rec_error = (i*avg_rec_error + rec_error) / (i+100)
        print "Epoch %d: avg_rec_error = %f" % (j+1, avg_rec_error)

        if (j%50)==0:
            log_z = compute_log_z(rbm, free_energy_fn)
            nll = compute_nll(rbm, data, log_z, free_energy_fn)
            nlls.append(nll)
            print "Epoch %d: avg_nll = %f" % (j+1, nll)
    if not args.no_plot:
        import matplotlib.pyplot as plt
        plt.subplot(2, 1, 1)
        plt.plot(range(len(recon)), recon)
        plt.xlim(0, len(recon))
        plt.title('Reconstruction error per minibatch')
        plt.xlabel('minibatch number')
        plt.ylabel('reconstruction error')
        plt.subplot(2, 1, 2)
        plt.plot(range(0, len(nlls) * 50, 50), nlls, '-d')
        plt.xlabel('Epoch')
        plt.ylabel('Average nll per data point')
        plt.show()