Exemple #1
0
    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        model = iRBM(input_size=input_size,
                     hidden_size=32,
                     beta=1.01,
                     CDk=1,
                     rng=np.random.RandomState(42))

        W = rng.rand(model.hidden_size, model.input_size).astype(theano.config.floatX)
        model.W = theano.shared(value=W.astype(theano.config.floatX), name='W', borrow=True)

        b = rng.rand(model.hidden_size).astype(theano.config.floatX)
        model.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True)

        c = rng.rand(model.input_size).astype(theano.config.floatX)
        model.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True)

        params = [model.W, model.b, model.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(model.free_energy(chain_start)) - T.mean(model.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = RBM.sample_h_given_v(model, chain_start, return_probs=True)
        _h = RBM.sample_h_given_v(model, chain_end, return_probs=True)
        icdf = model.icdf_z_given_v(chain_start)
        _icdf = model.icdf_z_given_v(chain_end)

        if model.penalty == "softplus_bi":
            penalty = model.beta * T.nnet.sigmoid(model.b)
        elif self.penalty == "softplus0":
            penalty = model.beta * T.nnet.sigmoid(0)

        grad_W = (T.dot(chain_end.T, _h*_icdf) - T.dot(chain_start.T, h*icdf)).T / batch_size
        grad_b = T.mean((_h-penalty)*_icdf - (h-penalty)*icdf, axis=0)
        grad_c = T.mean(chain_end - chain_start, axis=0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name, decimal=5)  # decimal=5 needed for float32
Exemple #2
0
    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        rbm = RBM(input_size=input_size,
                  hidden_size=32,
                  CDk=1,
                  rng=np.random.RandomState(42))

        W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True)

        b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX)
        rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True)

        c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True)

        params = [rbm.W, rbm.b, rbm.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(rbm.free_energy(chain_start)) - T.mean(rbm.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = rbm.sample_h_given_v(chain_start, return_probs=True)
        _h = rbm.sample_h_given_v(chain_end, return_probs=True)

        grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size
        grad_b = T.mean(_h - h, 0)
        grad_c = T.mean(chain_end - chain_start, 0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
Exemple #3
0
    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        model = oRBM(input_size=input_size,
                     hidden_size=32,
                     CDk=1,
                     rng=np.random.RandomState(42))

        W = rng.rand(model.hidden_size,
                     model.input_size).astype(theano.config.floatX)
        model.W = theano.shared(value=W.astype(theano.config.floatX),
                                name='W',
                                borrow=True)

        b = rng.rand(model.hidden_size).astype(theano.config.floatX)
        model.b = theano.shared(value=b.astype(theano.config.floatX),
                                name='b',
                                borrow=True)

        c = rng.rand(model.input_size).astype(theano.config.floatX)
        model.c = theano.shared(value=c.astype(theano.config.floatX),
                                name='c',
                                borrow=True)

        params = [model.W, model.b, model.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(
            theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(
            theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(model.free_energy(chain_start)) - T.mean(
            model.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = RBM.sample_h_given_v(model, chain_start, return_probs=True)
        _h = RBM.sample_h_given_v(model, chain_end, return_probs=True)
        icdf = model.icdf_z_given_v(chain_start)
        _icdf = model.icdf_z_given_v(chain_end)

        if model.penalty == "softplus_bi":
            penalty = model.beta * T.nnet.sigmoid(model.b)
        elif self.penalty == "softplus0":
            penalty = model.beta * T.nnet.sigmoid(0)

        grad_W = (T.dot(chain_end.T, _h * _icdf) -
                  T.dot(chain_start.T, h * icdf)).T / batch_size
        grad_b = T.mean((_h - penalty) * _icdf - (h - penalty) * icdf, axis=0)
        grad_c = T.mean(chain_end - chain_start, axis=0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({
                chain_start: chain_start_value,
                chain_end: chain_end_value
            })
            param2 = gparam_manual.eval({
                chain_start: chain_start_value,
                chain_end: chain_end_value
            })
            assert_array_almost_equal(param1,
                                      param2,
                                      err_msg=gparam_manual.name)
Exemple #4
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Check that a least one of --view or --save has been given.
    if not args.view and not args.save:
        parser.error("At least one the following options must be chosen: --view or --save")

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    if not os.path.isfile(pjoin(experiment_path, "model.pkl")):
        parser.error('Cannot find model for experiment: {0}!'.format(experiment_path))

    if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")):
        parser.error('Cannot find hyperparams for experiment: {0}!'.format(experiment_path))

    # Load experiments hyperparameters
    hyperparams = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json"))

    with Timer("Loading model"):
        if hyperparams["model"] == "rbm":
            from iRBM.models.rbm import RBM
            model_class = RBM
        elif hyperparams["model"] == "orbm":
            from iRBM.models.orbm import oRBM
            model_class = oRBM
        elif hyperparams["model"] == "irbm":
            from iRBM.models.irbm import iRBM
            model_class = iRBM

        # Load the actual model.
        model = model_class.load(pjoin(experiment_path, "model.pkl"))

    rng = np.random.RandomState(args.seed)

    # Sample from uniform
    # TODO: sample from Bernouilli distribution parametrized with visible biases
    chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype(theano.config.floatX)

    with Timer("Building sampling function"):
        v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX))
        v1 = model.gibbs_step(v0)
        gibbs_step = theano.function([], updates={v0: v1})

        if args.full_gibbs_step:
            print "Using z=K"
            # Use z=K for first Gibbs step.
            from iRBM.models.rbm import RBM
            h0 = RBM.sample_h_given_v(model, v0)
            v1 = RBM.sample_v_given_h(model, h0)
            v0.set_value(v1.eval())

    with Timer("Sampling"):
        for k in range(args.cdk):
            gibbs_step()

    samples = v0.get_value()

    if args.save:
        np.savez(args.out, samples)

    if args.view:
        if hyperparams["dataset"] == "binarized_mnist":
            image_shape = (28, 28)
        elif hyperparams["dataset"] == "caltech101_silhouettes28":
            image_shape = (28, 28)
        else:
            raise ValueError("Unknown dataset: {0}".format(hyperparams["dataset"]))

        data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1))
        plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest')
        plt.show()
Exemple #5
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Check that a least one of --view or --save has been given.
    if not args.view and not args.save:
        parser.error(
            "At least one the following options must be chosen: --view or --save"
        )

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    if not os.path.isfile(pjoin(experiment_path, "model.pkl")):
        parser.error(
            'Cannot find model for experiment: {0}!'.format(experiment_path))

    if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")):
        parser.error('Cannot find hyperparams for experiment: {0}!'.format(
            experiment_path))

    # Load experiments hyperparameters
    hyperparams = utils.load_dict_from_json_file(
        pjoin(experiment_path, "hyperparams.json"))

    with Timer("Loading model"):
        if hyperparams["model"] == "rbm":
            from iRBM.models.rbm import RBM
            model_class = RBM
        elif hyperparams["model"] == "orbm":
            from iRBM.models.orbm import oRBM
            model_class = oRBM
        elif hyperparams["model"] == "irbm":
            from iRBM.models.irbm import iRBM
            model_class = iRBM

        # Load the actual model.
        model = model_class.load(pjoin(experiment_path, "model.pkl"))

    rng = np.random.RandomState(args.seed)

    # Sample from uniform
    # TODO: sample from Bernouilli distribution parametrized with visible biases
    chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype(
        theano.config.floatX)

    with Timer("Building sampling function"):
        v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX))
        v1 = model.gibbs_step(v0)
        gibbs_step = theano.function([], updates={v0: v1})

        if args.full_gibbs_step:
            print "Using z=K"
            # Use z=K for first Gibbs step.
            from iRBM.models.rbm import RBM
            h0 = RBM.sample_h_given_v(model, v0)
            v1 = RBM.sample_v_given_h(model, h0)
            v0.set_value(v1.eval())

    with Timer("Sampling"):
        for k in range(args.cdk):
            gibbs_step()

    samples = v0.get_value()

    if args.save:
        np.savez(args.out, samples)

    if args.view:
        if hyperparams["dataset"] == "binarized_mnist":
            image_shape = (28, 28)
        elif hyperparams["dataset"] == "caltech101_silhouettes28":
            image_shape = (28, 28)
        else:
            raise ValueError("Unknown dataset: {0}".format(
                hyperparams["dataset"]))

        data = vizu.concatenate_images(samples,
                                       shape=image_shape,
                                       border_size=1,
                                       clim=(0, 1))
        plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest')
        plt.show()