Example #1
0
def test_make_symbolic_state():
    # Tests whether the returned p_sample and h_sample have the right
    # dimensions
    num_examples = 40
    theano_rng = MRG_RandomStreams(2012+11+1)

    visible_layer = BinaryVector(nvis=100)
    rval = visible_layer.make_symbolic_state(num_examples=num_examples,
                                             theano_rng=theano_rng)

    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.05,
                                       init_bias=-2.0)
    p_sample, h_sample = hidden_layer.make_symbolic_state(num_examples=num_examples,
                                                          theano_rng=theano_rng)

    softmax_layer = Softmax(n_classes=10, layer_name='s', irange=0.05)
    h_sample_s = softmax_layer.make_symbolic_state(num_examples=num_examples,
                                                   theano_rng=theano_rng)

    required_shapes = [(40, 100), (40, 500), (40, 500), (40, 10)]
    f = function(inputs=[],
                 outputs=[rval, p_sample, h_sample, h_sample_s])

    for s, r in zip(f(), required_shapes):
        assert s.shape == r
Example #2
0
def test_make_symbolic_state():
    # Tests whether the returned p_sample and h_sample have the right
    # dimensions
    num_examples = 40
    theano_rng = MRG_RandomStreams(2012+11+1)

    visible_layer = BinaryVector(nvis=100)
    rval = visible_layer.make_symbolic_state(num_examples=num_examples,
                                             theano_rng=theano_rng)

    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.05,
                                       init_bias=-2.0)
    p_sample, h_sample = hidden_layer.make_symbolic_state(num_examples=num_examples,
                                                          theano_rng=theano_rng)

    softmax_layer = Softmax(n_classes=10, layer_name='s', irange=0.05)
    h_sample_s = softmax_layer.make_symbolic_state(num_examples=num_examples,
                                                   theano_rng=theano_rng)

    required_shapes = [(40, 100), (40, 500), (40, 500), (40, 10)]
    f = function(inputs=[],
                 outputs=[rval, p_sample, h_sample, h_sample_s])

    for s, r in zip(f(), required_shapes):
        assert s.shape == r
Example #3
0
def setup():
    """
    Create pickle file with a simple model.
    """
    # tearDown is guaranteed to run pop_load_data.
    control.push_load_data(False)
    with open('dbm.pkl', 'wb') as f:
        dataset = MNIST(which_set='train', start=0, stop=100, binarize=True)
        vis_layer = BinaryVector(nvis=784, bias_from_marginals=dataset)
        hid_layer1 = BinaryVectorMaxPool(layer_name='h1',
                                         pool_size=1,
                                         irange=.05,
                                         init_bias=-2.,
                                         detector_layer_dim=50)
        hid_layer2 = BinaryVectorMaxPool(layer_name='h2',
                                         pool_size=1,
                                         irange=.05,
                                         init_bias=-2.,
                                         detector_layer_dim=10)
        model = DBM(batch_size=20,
                    niter=2,
                    visible_layer=vis_layer,
                    hidden_layers=[hid_layer1, hid_layer2])
        model.dataset_yaml_src = """
!obj:pylearn2.datasets.binarizer.Binarizer {
    raw: !obj:pylearn2.datasets.mnist.MNIST {
        which_set: "train",
        start: 0,
        stop: 100
    }
}
"""
        model.layer_to_chains = model.make_layer_to_state(1)
        cPickle.dump(model, f, protocol=cPickle.HIGHEST_PROTOCOL)
Example #4
0
def make_random_basic_binary_dbm(
        rng,
        pool_size_1,
        num_vis = None,
        num_pool_1 = None,
        num_pool_2 = None,
        pool_size_2 = None,
        center = False
        ):
    """
    Makes a DBM with BinaryVector for the visible layer,
    and two hidden layers of type BinaryVectorMaxPool.
    The weights and biases are initialized randomly with
    somewhat large values (i.e., not what you'd want to
    use for learning)

    rng: A numpy RandomState.
    pool_size_1: The size of the pools to use in the first
                 layer.
    """

    if num_vis is None:
        num_vis = rng.randint(1,11)
    if num_pool_1 is None:
        num_pool_1 = rng.randint(1,11)
    if num_pool_2 is None:
        num_pool_2 = rng.randint(1,11)
    if pool_size_2 is None:
        pool_size_2 = rng.randint(1,6)

    num_h1 = num_pool_1 * pool_size_1
    num_h2 = num_pool_2 * pool_size_2

    v = BinaryVector(num_vis, center=center)
    v.set_biases(rng.uniform(-1., 1., (num_vis,)).astype(config.floatX), recenter=center)

    h1 = BinaryVectorMaxPool(
            detector_layer_dim = num_h1,
            pool_size = pool_size_1,
            layer_name = 'h1',
            center = center,
            irange = 1.)
    h1.set_biases(rng.uniform(-1., 1., (num_h1,)).astype(config.floatX), recenter=center)

    h2 = BinaryVectorMaxPool(
            center = center,
            detector_layer_dim = num_h2,
            pool_size = pool_size_2,
            layer_name = 'h2',
            irange = 1.)
    h2.set_biases(rng.uniform(-1., 1., (num_h2,)).astype(config.floatX), recenter=center)

    dbm = DBM(visible_layer = v,
            hidden_layers = [h1, h2],
            batch_size = 1,
            niter = 50)

    return dbm
Example #5
0
def test_variational_cd():

    # Verifies that VariationalCD works well with make_layer_to_symbolic_state
    visible_layer = BinaryVector(nvis=100)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.05,
                                       init_bias=-2.0)
    model = DBM(visible_layer=visible_layer,
                hidden_layers=[hidden_layer],
                batch_size=100,
                niter=1)

    cost = VariationalCD(num_chains=100, num_gibbs_steps=2)

    data_specs = cost.get_data_specs(model)
    mapping = DataSpecsMapping(data_specs)
    space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
    source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

    theano_args = []
    for space, source in safe_zip(space_tuple, source_tuple):
        name = '%s' % (source)
        arg = space.make_theano_batch(name=name)
        theano_args.append(arg)
    theano_args = tuple(theano_args)
    nested_args = mapping.nest(theano_args)

    grads, updates = cost.get_gradients(model, nested_args)
Example #6
0
def test_bvmp_make_state():

    # Verifies that BinaryVector.make_state creates
    # a shared variable whose value passes check_binary_samples

    num_pools = 3
    num_samples = 1000
    tol = .04
    rng = np.random.RandomState([2012,11,1,9])
    # pool_size=1 is an important corner case
    for pool_size in [1, 2, 5]:
        n = num_pools * pool_size

        layer = BinaryVectorMaxPool(
                detector_layer_dim=n,
                layer_name='h',
                irange=1.,
                pool_size=pool_size)

        # This is just to placate mf_update below
        input_space = VectorSpace(1)
        class DummyDBM(object):
            def __init__(self):
                self.rng = rng
        layer.set_dbm(DummyDBM())
        layer.set_input_space(input_space)

        layer.set_biases(rng.uniform(-pool_size, 1., (n,)).astype(config.floatX))

        # To find the mean of the samples, we use mean field with an input of 0
        mean = layer.mf_update(
                state_below=T.alloc(0., 1, 1),
                state_above=None,
                layer_above=None)

        mean = function([], mean)()

        mean = [ mn[0,:] for mn in mean ]

        state = layer.make_state(num_examples=num_samples,
                numpy_rng=rng)

        value = [elem.get_value() for elem in state]

        check_bvmp_samples(value, num_samples, n, pool_size, mean, tol)
Example #7
0
def test_bvmp_make_state():

    # Verifies that BinaryVector.make_state creates
    # a shared variable whose value passes check_binary_samples

    num_pools = 3
    num_samples = 1000
    tol = .04
    rng = np.random.RandomState([2012,11,1,9])
    # pool_size=1 is an important corner case
    for pool_size in [1, 2, 5]:
        n = num_pools * pool_size

        layer = BinaryVectorMaxPool(
                detector_layer_dim=n,
                layer_name='h',
                irange=1.,
                pool_size=pool_size)

        # This is just to placate mf_update below
        input_space = VectorSpace(1)
        class DummyDBM(object):
            def __init__(self):
                self.rng = rng
        layer.set_dbm(DummyDBM())
        layer.set_input_space(input_space)

        layer.set_biases(rng.uniform(-pool_size, 1., (n,)).astype(config.floatX))

        # To find the mean of the samples, we use mean field with an input of 0
        mean = layer.mf_update(
                state_below=T.alloc(0., 1, 1),
                state_above=None,
                layer_above=None)

        mean = function([], mean)()

        mean = [ mn[0,:] for mn in mean ]

        state = layer.make_state(num_examples=num_samples,
                numpy_rng=rng)

        value = [elem.get_value() for elem in state]

        check_bvmp_samples(value, num_samples, n, pool_size, mean, tol)
Example #8
0
def make_random_basic_binary_dbm(
        rng,
        pool_size_1,
        num_vis = None,
        num_pool_1 = None,
        num_pool_2 = None,
        pool_size_2 = None,
        center = False
        ):
    """
    Makes a DBM with BinaryVector for the visible layer,
    and two hidden layers of type BinaryVectorMaxPool.
    The weights and biases are initialized randomly with
    somewhat large values (i.e., not what you'd want to
    use for learning)

    rng: A numpy RandomState.
    pool_size_1: The size of the pools to use in the first
                 layer.
    """

    if num_vis is None:
        num_vis = rng.randint(1,11)
    if num_pool_1 is None:
        num_pool_1 = rng.randint(1,11)
    if num_pool_2 is None:
        num_pool_2 = rng.randint(1,11)
    if pool_size_2 is None:
        pool_size_2 = rng.randint(1,6)

    num_h1 = num_pool_1 * pool_size_1
    num_h2 = num_pool_2 * pool_size_2

    v = BinaryVector(num_vis, center=center)
    v.set_biases(rng.uniform(-1., 1., (num_vis,)).astype(config.floatX), recenter=center)

    h1 = BinaryVectorMaxPool(
            detector_layer_dim = num_h1,
            pool_size = pool_size_1,
            layer_name = 'h1',
            center = center,
            irange = 1.)
    h1.set_biases(rng.uniform(-1., 1., (num_h1,)).astype(config.floatX), recenter=center)

    h2 = BinaryVectorMaxPool(
            center = center,
            detector_layer_dim = num_h2,
            pool_size = pool_size_2,
            layer_name = 'h2',
            irange = 1.)
    h2.set_biases(rng.uniform(-1., 1., (num_h2,)).astype(config.floatX), recenter=center)

    dbm = DBM(visible_layer = v,
            hidden_layers = [h1, h2],
            batch_size = 1,
            niter = 50)

    return dbm
Example #9
0
def test_ais():
    """
    Test ais computation by comparing the output of estimate_likelihood to
    Russ's code's output for the same parameters.
    """
    try:
        # TODO: the one_hot=True is only necessary because one_hot=False is
        # broken, remove it after one_hot=False is fixed.
        trainset = MNIST(which_set='train', one_hot=True)
        testset = MNIST(which_set='test', one_hot=True)
    except NoDataPathError:
        raise SkipTest("PYLEARN2_DATA_PATH environment variable not defined")

    nvis = 784
    nhid = 20
    # Random initialization of RBM parameters
    numpy.random.seed(98734)
    w_hid = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(
        nvis, nhid))
    b_vis = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nvis))
    b_hid = 10 * numpy.cast[theano.config.floatX](numpy.random.randn(nhid))

    # Initialization of RBM
    visible_layer = BinaryVector(nvis)
    hidden_layer = BinaryVectorMaxPool(detector_layer_dim=nhid,
                                       pool_size=1,
                                       layer_name='h',
                                       irange=0.1)
    rbm = DBM(100, visible_layer, [hidden_layer], 1)
    rbm.visible_layer.set_biases(b_vis)
    rbm.hidden_layers[0].set_weights(w_hid)
    rbm.hidden_layers[0].set_biases(b_hid)
    rbm.nvis = nvis
    rbm.nhid = nhid

    # Compute real logz and associated train_ll and test_ll using rbm_tools
    v_sample = T.matrix('v_sample')
    h_sample = T.matrix('h_sample')
    W = theano.shared(rbm.hidden_layers[0].get_weights())
    hbias = theano.shared(rbm.hidden_layers[0].get_biases())
    vbias = theano.shared(rbm.visible_layer.get_biases())

    wx_b = T.dot(v_sample, W) + hbias
    vbias_term = T.dot(v_sample, vbias)
    hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1)
    free_energy_v = -hidden_term - vbias_term
    free_energy_v_fn = theano.function(inputs=[v_sample],
                                       outputs=free_energy_v)

    wh_c = T.dot(h_sample, W.T) + vbias
    hbias_term = T.dot(h_sample, hbias)
    visible_term = T.sum(T.log(1 + T.exp(wh_c)), axis=1)
    free_energy_h = -visible_term - hbias_term
    free_energy_h_fn = theano.function(inputs=[h_sample],
                                       outputs=free_energy_h)

    real_logz = rbm_tools.compute_log_z(rbm, free_energy_h_fn)

    real_ais_train_ll = -rbm_tools.compute_nll(
        rbm, trainset.get_design_matrix(), real_logz, free_energy_v_fn)
    real_ais_test_ll = -rbm_tools.compute_nll(rbm, testset.get_design_matrix(),
                                              real_logz, free_energy_v_fn)

    # Compute train_ll, test_ll and logz using dbm_metrics
    train_ll, test_ll, logz = dbm_metrics.estimate_likelihood([W],
                                                              [vbias, hbias],
                                                              trainset,
                                                              testset,
                                                              pos_mf_steps=100)
    assert (real_logz - logz) < 2.0
    assert (real_ais_train_ll - train_ll) < 2.0
    assert (real_ais_test_ll - test_ll) < 2.0