Example #1
0
    def test_verify_AIS(self):
        model = RBM(input_size=self.input_size, hidden_size=self.hidden_size)

        model.W.set_value(self.W)
        model.b.set_value(self.b)
        model.c.set_value(self.c)

        # Brute force
        print "Computing lnZ using brute force (i.e. summing the free energy of all posible $v$)..."
        V = theano.shared(
            value=cartesian([(0, 1)] * self.input_size, dtype=config.floatX))
        brute_force_lnZ = logsumexp(-model.free_energy(V), 0)
        f_brute_force_lnZ = theano.function([], brute_force_lnZ)

        params_bak = [param.get_value() for param in model.parameters]

        print "Approximating lnZ using AIS..."
        import time
        start = time.time()

        try:
            experiment_path = tempfile.mkdtemp()
            result = compute_AIS(model,
                                 M=self.nb_samples,
                                 betas=self.betas,
                                 seed=1234,
                                 experiment_path=experiment_path,
                                 force=True)
            logcummean_Z, logcumstd_Z_down, logcumstd_Z_up = result[
                'logcummean_Z'], result['logcumstd_Z_down'], result[
                    'logcumstd_Z_up']
            std_lnZ = result['std_lnZ']

            print "{0} sec".format(time.time() - start)

            import pylab as plt
            plt.gca().set_xmargin(0.1)
            plt.errorbar(range(1, self.nb_samples + 1),
                         logcummean_Z,
                         yerr=[std_lnZ, std_lnZ],
                         fmt='or')
            plt.errorbar(range(1, self.nb_samples + 1),
                         logcummean_Z,
                         yerr=[logcumstd_Z_down, logcumstd_Z_up],
                         fmt='ob')
            plt.plot([1, self.nb_samples], [f_brute_force_lnZ()] * 2, '--g')
            plt.ticklabel_format(useOffset=False, axis='y')
            plt.show()
            AIS_logZ = logcummean_Z[-1]

            assert_array_equal(params_bak[0], model.W.get_value())
            assert_array_equal(params_bak[1], model.b.get_value())
            assert_array_equal(params_bak[2], model.c.get_value())

            print "Absolute diff:", np.abs(AIS_logZ - f_brute_force_lnZ())
            assert_almost_equal(AIS_logZ, f_brute_force_lnZ(), decimal=2)
        finally:
            shutil.rmtree(experiment_path)
Example #2
0
    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        model = iRBM(input_size=input_size,
                     hidden_size=32,
                     beta=1.01,
                     CDk=1,
                     rng=np.random.RandomState(42))

        W = rng.rand(model.hidden_size, model.input_size).astype(theano.config.floatX)
        model.W = theano.shared(value=W.astype(theano.config.floatX), name='W', borrow=True)

        b = rng.rand(model.hidden_size).astype(theano.config.floatX)
        model.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True)

        c = rng.rand(model.input_size).astype(theano.config.floatX)
        model.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True)

        params = [model.W, model.b, model.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(model.free_energy(chain_start)) - T.mean(model.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = RBM.sample_h_given_v(model, chain_start, return_probs=True)
        _h = RBM.sample_h_given_v(model, chain_end, return_probs=True)
        icdf = model.icdf_z_given_v(chain_start)
        _icdf = model.icdf_z_given_v(chain_end)

        if model.penalty == "softplus_bi":
            penalty = model.beta * T.nnet.sigmoid(model.b)
        elif self.penalty == "softplus0":
            penalty = model.beta * T.nnet.sigmoid(0)

        grad_W = (T.dot(chain_end.T, _h*_icdf) - T.dot(chain_start.T, h*icdf)).T / batch_size
        grad_b = T.mean((_h-penalty)*_icdf - (h-penalty)*icdf, axis=0)
        grad_c = T.mean(chain_end - chain_start, axis=0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name, decimal=5)  # decimal=5 needed for float32
Example #3
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 beta=1,
                 penalty="softplus_bi",
                 *args, **kwargs):

        RBM.__init__(self, input_size, hidden_size, *args, **kwargs)

        self.penalty = penalty
        self.beta = theano.shared(np.array(beta, dtype=theano.config.floatX), name="beta")
Example #4
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 beta=1,
                 penalty="softplus_bi",
                 *args,
                 **kwargs):

        RBM.__init__(self, input_size, hidden_size, *args, **kwargs)

        self.penalty = penalty
        self.beta = theano.shared(np.array(beta, dtype=theano.config.floatX),
                                  name="beta")
Example #5
0
    def get_base_rate(self, base_rate_type="uniform"):
        base_rate, annealable_params = RBM.get_base_rate(self, base_rate_type)
        #annealable_params.append(self.beta)  # Seems to work better without annealing self.beta (see unit tests)

        if base_rate_type == "uniform":
            def compute_lnZ(self):
                # Since biases and weights are all 0, there are $2^input_size$ different
                #  visible neuron's states having the following energy
                #  $\sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2))$
                r = T.exp((1-self.beta) * T.log(2))  # Ratio of a geometric serie
                lnZ = T.log((r - r**(self.hidden_size+1)) / (1-r))
                return (self.input_size * T.log(2) +  # ln(2^input_size)
                        lnZ)  # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$

        elif base_rate_type == "c":
            def compute_lnZ(self):
                # Since the hidden biases (but not the visible ones) and the weights are all 0
                r = T.exp((1-self.beta) * T.log(2))  # Ratio of a geometric serie
                lnZ = T.log((r - r**(self.hidden_size+1)) / (1-r))
                return (lnZ +  # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$
                        T.sum(T.nnet.softplus(self.c)))

        elif base_rate_type == "b":
            raise NotImplementedError()

        import types
        base_rate.compute_lnZ = types.MethodType(compute_lnZ, base_rate)

        return base_rate, annealable_params
Example #6
0
    def get_base_rate(self, base_rate_type="uniform"):
        base_rate, annealable_params = RBM.get_base_rate(self, base_rate_type)
        #annealable_params.append(self.beta)  # Seems to work better without annealing self.beta (see unit tests)

        if base_rate_type == "uniform":
            def compute_lnZ(self):
                # Since biases and weights are all 0, there are $2^input_size$ different
                #  visible neuron's states having the following energy
                #  $\sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2))$
                r = T.exp((1-self.beta) * T.log(2))  # Ratio of a geometric serie
                lnZ = T.log(r / (1-r))  # Convergence of the geometric serie
                return (self.input_size * T.log(2) +  # ln(2^input_size)
                        lnZ)  # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$

        elif base_rate_type == "c":
            def compute_lnZ(self):
                # Since the hidden biases (but not the visible ones) and the weights are all 0
                r = T.exp((1-self.beta) * T.log(2))  # Ratio of a geometric serie
                lnZ = T.log(r / (1-r))  # Convergence of the geometric serie
                return (lnZ +  # $ln( \sum_{z=1}^H \sum_{h \in \{0,1\}^z} \exp(-\beta z \ln(2)) )$
                        T.sum(T.nnet.softplus(self.c)))

        elif base_rate_type == "b":
            raise NotImplementedError()

        import types
        base_rate.compute_lnZ = types.MethodType(compute_lnZ, base_rate)

        return base_rate, annealable_params
Example #7
0
    def setUp(self):
        self.input_size = 4
        self.hidden_size = 3
        self.batch_size = 100

        rng = np.random.RandomState(42)
        self.W = rng.randn(self.hidden_size, self.input_size).astype(config.floatX)
        self.b = rng.randn(self.hidden_size).astype(config.floatX)
        self.c = rng.randn(self.input_size).astype(config.floatX)

        self.model = RBM(input_size=self.input_size,
                         hidden_size=self.hidden_size)

        self.model.W.set_value(self.W)
        self.model.b.set_value(self.b)
        self.model.c.set_value(self.c)
Example #8
0
    def test_verify_AIS(self):
        model = RBM(input_size=self.input_size,
                    hidden_size=self.hidden_size)

        model.W.set_value(self.W)
        model.b.set_value(self.b)
        model.c.set_value(self.c)

        # Brute force
        print "Computing lnZ using brute force (i.e. summing the free energy of all posible $v$)..."
        V = theano.shared(value=cartesian([(0, 1)] * self.input_size, dtype=config.floatX))
        brute_force_lnZ = logsumexp(-model.free_energy(V), 0)
        f_brute_force_lnZ = theano.function([], brute_force_lnZ)

        params_bak = [param.get_value() for param in model.parameters]

        print "Approximating lnZ using AIS..."
        import time
        start = time.time()

        try:
            ais_working_dir = tempfile.mkdtemp()
            result = compute_AIS(model, M=self.nb_samples, betas=self.betas, seed=1234, ais_working_dir=ais_working_dir, force=True)
            logcummean_Z, logcumstd_Z_down, logcumstd_Z_up = result['logcummean_Z'], result['logcumstd_Z_down'], result['logcumstd_Z_up']
            std_lnZ = result['std_lnZ']

            print "{0} sec".format(time.time() - start)

            import pylab as plt
            plt.gca().set_xmargin(0.1)
            plt.errorbar(range(1, self.nb_samples+1), logcummean_Z, yerr=[std_lnZ, std_lnZ], fmt='or')
            plt.errorbar(range(1, self.nb_samples+1), logcummean_Z, yerr=[logcumstd_Z_down, logcumstd_Z_up], fmt='ob')
            plt.plot([1, self.nb_samples], [f_brute_force_lnZ()]*2, '--g')
            plt.ticklabel_format(useOffset=False, axis='y')
            plt.show()
            AIS_logZ = logcummean_Z[-1]

            assert_array_equal(params_bak[0], model.W.get_value())
            assert_array_equal(params_bak[1], model.b.get_value())
            assert_array_equal(params_bak[2], model.c.get_value())

            print "Absolute diff:", np.abs(AIS_logZ - f_brute_force_lnZ())
            assert_almost_equal(AIS_logZ, f_brute_force_lnZ(), decimal=2)
        finally:
            shutil.rmtree(ais_working_dir)
Example #9
0
    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        rbm = RBM(input_size=input_size,
                  hidden_size=32,
                  CDk=1,
                  rng=np.random.RandomState(42))

        W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True)

        b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX)
        rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True)

        c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True)

        params = [rbm.W, rbm.b, rbm.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(rbm.free_energy(chain_start)) - T.mean(rbm.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = rbm.sample_h_given_v(chain_start, return_probs=True)
        _h = rbm.sample_h_given_v(chain_end, return_probs=True)

        grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size
        grad_b = T.mean(_h - h, 0)
        grad_c = T.mean(chain_end - chain_start, 0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
Example #10
0
    def __getstate__(self):
        state = {}
        state.update(RBM.__getstate__(self))
        state["oRBM_version"] = 1

        # Hyper parameters
        state["beta"] = self.beta.get_value()
        state["penalty"] = self.penalty

        return state
Example #11
0
    def __getstate__(self):
        state = {}
        state.update(RBM.__getstate__(self))
        state['oRBM_version'] = 1

        # Hyper parameters
        state['beta'] = self.beta.get_value()
        state['penalty'] = self.penalty

        return state
Example #12
0
    def __getstate__(self):
        state = {}
        state.update(RBM.__getstate__(self))
        state['oRBM_version'] = 1

        # Hyper parameters
        state['beta'] = self.beta.get_value()
        state['penalty'] = self.penalty

        return state
Example #13
0
class Test_RBM(unittest.TestCase):
    def setUp(self):
        self.input_size = 4
        self.hidden_size = 3
        self.batch_size = 100

        rng = np.random.RandomState(42)
        self.W = rng.randn(self.hidden_size, self.input_size).astype(config.floatX)
        self.b = rng.randn(self.hidden_size).astype(config.floatX)
        self.c = rng.randn(self.input_size).astype(config.floatX)

        self.model = RBM(input_size=self.input_size,
                         hidden_size=self.hidden_size)

        self.model.W.set_value(self.W)
        self.model.b.set_value(self.b)
        self.model.c.set_value(self.c)

    def test_free_energy(self):
        v = T.matrix('v')
        h = T.matrix('h')
        logsumexp_E = theano.function([v, h], -logsumexp(-self.model.E(v, h)))

        v1 = np.random.rand(1, self.input_size).astype(config.floatX)
        H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX)
        Fv = logsumexp_E(v1, H)  # Marginalization over $\bh$

        v = T.matrix('v')
        free_energy = theano.function([v], self.model.free_energy(v))
        assert_array_almost_equal(free_energy(v1), [Fv])

        v2 = np.tile(v1, (self.batch_size, 1))
        assert_array_almost_equal(free_energy(v2), [Fv]*self.batch_size)

    def test_marginalize_over_v(self):
        v = T.matrix('v')
        h = T.matrix('h')
        E = theano.function([v, h], -logsumexp(-self.model.E(v, h)))

        h1 = np.random.rand(1, self.hidden_size).astype(config.floatX)
        V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX)
        expected_energy = E(V, h1)

        h = T.matrix('h')
        marginalize_over_v = theano.function([h], self.model.marginalize_over_v(h))
        assert_array_almost_equal(marginalize_over_v(h1), [expected_energy])

        h2 = np.tile(h1, (self.batch_size, 1))
        assert_array_almost_equal(marginalize_over_v(h2), [expected_energy]*self.batch_size)

    def test_compute_lnZ(self):
        v = T.matrix('v')
        h = T.matrix('h')
        lnZ = theano.function([v, h], logsumexp(-self.model.E(v, h)))

        V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX)
        H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX)

        lnZ_using_free_energy = theano.function([v], logsumexp(-self.model.free_energy(v)))
        assert_equal(lnZ_using_free_energy(V), lnZ(V, H))

        lnZ_using_marginalize_over_v = theano.function([h], logsumexp(-self.model.marginalize_over_v(h)))
        assert_almost_equal(lnZ_using_marginalize_over_v(H), lnZ(V, H), decimal=6)

    def test_base_rate(self):
        # All binary combinaisons for V and H.
        V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX)
        H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX)

        base_rates = []
        # Add the uniform base rate, i.e. all parameters of the model are set to 0.
        base_rates.append(self.model.get_base_rate())
        # Add the base rate where visible biases are the ones from the model.
        base_rates.append(self.model.get_base_rate('c'))
        # Add the base rate where hidden biases are the ones from the model.
        base_rates.append(self.model.get_base_rate('b'))  # Not implemented

        for base_rate, anneable_params in base_rates:
            base_rate_lnZ = base_rate.compute_lnZ().eval().astype(config.floatX)

            brute_force_lnZ = logsumexp(-base_rate.E(V, H)).eval()
            assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)

            theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval()
            assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)

            theano_lnZ = logsumexp(-base_rate.marginalize_over_v(H)).eval()
            assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)

    @npt.dec.slow
    def test_binomial_from_uniform_cpu(self):
        #Test using numpy
        rng = np.random.RandomState(42)
        probs = rng.rand(10)

        seed = 1337
        nb_samples = 1000000
        rng = np.random.RandomState(seed)
        success1 = np.zeros(len(probs))
        for i in range(nb_samples):
            success1 += rng.binomial(n=1, p=probs)

        rng = np.random.RandomState(seed)
        success2 = np.zeros(len(probs))
        for i in range(nb_samples):
            success2 += (rng.rand(len(probs)) < probs).astype('int')

        success1 = success1 / nb_samples
        success2 = success2 / nb_samples

        assert_array_almost_equal(success1, success2)

        #Test using Theano's default RandomStreams
        theano_rng = RandomStreams(1337)
        rng_bin = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX)
        success1 = np.zeros(len(probs))
        for i in range(nb_samples):
            success1 += rng_bin.eval()

        theano_rng = RandomStreams(1337)
        rng_bin = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs
        success2 = np.zeros(len(probs))
        for i in range(nb_samples):
            success2 += rng_bin.eval()

        assert_array_almost_equal(success1/nb_samples, success2/nb_samples)

        #Test using Theano's sandbox MRG RandomStreams
        theano_rng = MRG_RandomStreams(1337)
        success1 = theano_rng.binomial(size=probs.shape, n=1, p=probs, dtype=theano.config.floatX)

        theano_rng = MRG_RandomStreams(1337)
        success2 = theano_rng.uniform(size=probs.shape, dtype=theano.config.floatX) < probs

        assert_array_equal(success1.eval(), success2.eval())

    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        rbm = RBM(input_size=input_size,
                  hidden_size=32,
                  CDk=1,
                  rng=np.random.RandomState(42))

        W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True)

        b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX)
        rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True)

        c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True)

        params = [rbm.W, rbm.b, rbm.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(rbm.free_energy(chain_start)) - T.mean(rbm.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = rbm.sample_h_given_v(chain_start, return_probs=True)
        _h = rbm.sample_h_given_v(chain_end, return_probs=True)

        grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size
        grad_b = T.mean(_h - h, 0)
        grad_c = T.mean(chain_end - chain_start, 0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value})
            assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
Example #14
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Check that a least one of --view or --save has been given.
    if not args.view and not args.save:
        parser.error("At least one the following options must be chosen: --view or --save")

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    if not os.path.isfile(pjoin(experiment_path, "model.pkl")):
        parser.error('Cannot find model for experiment: {0}!'.format(experiment_path))

    if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")):
        parser.error('Cannot find hyperparams for experiment: {0}!'.format(experiment_path))

    # Load experiments hyperparameters
    hyperparams = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json"))

    with Timer("Loading model"):
        if hyperparams["model"] == "rbm":
            from iRBM.models.rbm import RBM
            model_class = RBM
        elif hyperparams["model"] == "orbm":
            from iRBM.models.orbm import oRBM
            model_class = oRBM
        elif hyperparams["model"] == "irbm":
            from iRBM.models.irbm import iRBM
            model_class = iRBM

        # Load the actual model.
        model = model_class.load(pjoin(experiment_path, "model.pkl"))

    rng = np.random.RandomState(args.seed)

    # Sample from uniform
    # TODO: sample from Bernouilli distribution parametrized with visible biases
    chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype(theano.config.floatX)

    with Timer("Building sampling function"):
        v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX))
        v1 = model.gibbs_step(v0)
        gibbs_step = theano.function([], updates={v0: v1})

        if args.full_gibbs_step:
            print "Using z=K"
            # Use z=K for first Gibbs step.
            from iRBM.models.rbm import RBM
            h0 = RBM.sample_h_given_v(model, v0)
            v1 = RBM.sample_v_given_h(model, h0)
            v0.set_value(v1.eval())

    with Timer("Sampling"):
        for k in range(args.cdk):
            gibbs_step()

    samples = v0.get_value()

    if args.save:
        np.savez(args.out, samples)

    if args.view:
        if hyperparams["dataset"] == "binarized_mnist":
            image_shape = (28, 28)
        elif hyperparams["dataset"] == "caltech101_silhouettes28":
            image_shape = (28, 28)
        else:
            raise ValueError("Unknown dataset: {0}".format(hyperparams["dataset"]))

        data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1))
        plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest')
        plt.show()
Example #15
0
    def __setstate__(self, state):
        RBM.__setstate__(self, state)

        # Hyper parameters
        self.beta = theano.shared(state['beta'], name="beta")
        self.penalty = state['penalty']
Example #16
0
    def __setstate__(self, state):
        RBM.__setstate__(self, state)

        # Hyper parameters
        self.beta = theano.shared(state['beta'], name="beta")
        self.penalty = state['penalty']
Example #17
0
    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        model = oRBM(input_size=input_size,
                     hidden_size=32,
                     CDk=1,
                     rng=np.random.RandomState(42))

        W = rng.rand(model.hidden_size,
                     model.input_size).astype(theano.config.floatX)
        model.W = theano.shared(value=W.astype(theano.config.floatX),
                                name='W',
                                borrow=True)

        b = rng.rand(model.hidden_size).astype(theano.config.floatX)
        model.b = theano.shared(value=b.astype(theano.config.floatX),
                                name='b',
                                borrow=True)

        c = rng.rand(model.input_size).astype(theano.config.floatX)
        model.c = theano.shared(value=c.astype(theano.config.floatX),
                                name='c',
                                borrow=True)

        params = [model.W, model.b, model.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(
            theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(
            theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(model.free_energy(chain_start)) - T.mean(
            model.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = RBM.sample_h_given_v(model, chain_start, return_probs=True)
        _h = RBM.sample_h_given_v(model, chain_end, return_probs=True)
        icdf = model.icdf_z_given_v(chain_start)
        _icdf = model.icdf_z_given_v(chain_end)

        if model.penalty == "softplus_bi":
            penalty = model.beta * T.nnet.sigmoid(model.b)
        elif self.penalty == "softplus0":
            penalty = model.beta * T.nnet.sigmoid(0)

        grad_W = (T.dot(chain_end.T, _h * _icdf) -
                  T.dot(chain_start.T, h * icdf)).T / batch_size
        grad_b = T.mean((_h - penalty) * _icdf - (h - penalty) * icdf, axis=0)
        grad_c = T.mean(chain_end - chain_start, axis=0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({
                chain_start: chain_start_value,
                chain_end: chain_end_value
            })
            param2 = gparam_manual.eval({
                chain_start: chain_start_value,
                chain_end: chain_end_value
            })
            assert_array_almost_equal(param1,
                                      param2,
                                      err_msg=gparam_manual.name)
Example #18
0
def model_factory(model_name, input_size, hyperparams):
    #Set learning rate method that will be used.
    if hyperparams["ConstantLearningRate"] is not None:
        infos = hyperparams["ConstantLearningRate"].split()
        lr = float(infos[0])
        lr_method = ConstantLearningRate(lr=lr)
    elif hyperparams["ADAGRAD"] is not None:
        infos = hyperparams["ADAGRAD"].split()
        lr = float(infos[0])
        eps = float(infos[1]) if len(infos) > 1 else 1e-6
        lr_method = ADAGRAD(lr=lr, eps=eps)
    else:
        raise ValueError("The update rule is mandatory!")

    #Set regularization method that will be used.
    regularization_method = NoRegularization()
    if hyperparams["L1Regularization"] is not None and hyperparams[
            "L1Regularization"] != 0:
        lambda_factor = float(hyperparams["L1Regularization"])
        regularization_method = L1Regularization(lambda_factor)
    elif hyperparams["L2Regularization"] is not None and hyperparams[
            "L2Regularization"] != 0:
        lambda_factor = float(hyperparams["L2Regularization"])
        regularization_method = L2Regularization(lambda_factor)

    #Set contrastive divergence method to use.
    CD_method = ContrastiveDivergence()
    if hyperparams["PCD"]:
        CD_method = PersistentCD(input_size,
                                 nb_particles=hyperparams['batch_size'])

    rng = np.random.RandomState(hyperparams["seed"])

    #Build model
    if model_name == "rbm":
        from iRBM.models.rbm import RBM
        model = RBM(input_size=input_size,
                    hidden_size=hyperparams["size"],
                    learning_rate=lr_method,
                    regularization=regularization_method,
                    CD=CD_method,
                    CDk=hyperparams["cdk"],
                    rng=rng)

    elif model_name == "orbm":
        from iRBM.models.orbm import oRBM
        model = oRBM(input_size=input_size,
                     hidden_size=hyperparams["size"],
                     beta=hyperparams["beta"],
                     learning_rate=lr_method,
                     regularization=regularization_method,
                     CD=CD_method,
                     CDk=hyperparams["cdk"],
                     rng=rng)

    elif model_name == "irbm":
        from iRBM.models.irbm import iRBM
        model = iRBM(input_size=input_size,
                     hidden_size=hyperparams["size"],
                     beta=hyperparams["beta"],
                     learning_rate=lr_method,
                     regularization=regularization_method,
                     CD=CD_method,
                     CDk=hyperparams["cdk"],
                     rng=rng)

    return model
Example #19
0
class Test_RBM(unittest.TestCase):
    def setUp(self):
        self.input_size = 4
        self.hidden_size = 3
        self.batch_size = 100

        rng = np.random.RandomState(42)
        self.W = rng.randn(self.hidden_size,
                           self.input_size).astype(config.floatX)
        self.b = rng.randn(self.hidden_size).astype(config.floatX)
        self.c = rng.randn(self.input_size).astype(config.floatX)

        self.model = RBM(input_size=self.input_size,
                         hidden_size=self.hidden_size)

        self.model.W.set_value(self.W)
        self.model.b.set_value(self.b)
        self.model.c.set_value(self.c)

    def test_free_energy(self):
        v = T.matrix('v')
        h = T.matrix('h')
        logsumexp_E = theano.function([v, h], -logsumexp(-self.model.E(v, h)))

        v1 = np.random.rand(1, self.input_size).astype(config.floatX)
        H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX)
        Fv = logsumexp_E(v1, H)  # Marginalization over $\bh$

        v = T.matrix('v')
        free_energy = theano.function([v], self.model.free_energy(v))
        assert_array_almost_equal(free_energy(v1), [Fv])

        v2 = np.tile(v1, (self.batch_size, 1))
        assert_array_almost_equal(free_energy(v2), [Fv] * self.batch_size)

    def test_marginalize_over_v(self):
        v = T.matrix('v')
        h = T.matrix('h')
        E = theano.function([v, h], -logsumexp(-self.model.E(v, h)))

        h1 = np.random.rand(1, self.hidden_size).astype(config.floatX)
        V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX)
        expected_energy = E(V, h1)

        h = T.matrix('h')
        marginalize_over_v = theano.function([h],
                                             self.model.marginalize_over_v(h))
        assert_array_almost_equal(marginalize_over_v(h1), [expected_energy])

        h2 = np.tile(h1, (self.batch_size, 1))
        assert_array_almost_equal(marginalize_over_v(h2),
                                  [expected_energy] * self.batch_size)

    def test_compute_lnZ(self):
        v = T.matrix('v')
        h = T.matrix('h')
        lnZ = theano.function([v, h], logsumexp(-self.model.E(v, h)))

        V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX)
        H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX)

        lnZ_using_free_energy = theano.function(
            [v], logsumexp(-self.model.free_energy(v)))
        assert_equal(lnZ_using_free_energy(V), lnZ(V, H))

        lnZ_using_marginalize_over_v = theano.function(
            [h], logsumexp(-self.model.marginalize_over_v(h)))
        assert_almost_equal(lnZ_using_marginalize_over_v(H),
                            lnZ(V, H),
                            decimal=6)

    def test_base_rate(self):
        # All binary combinaisons for V and H.
        V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX)
        H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX)

        base_rates = []
        # Add the uniform base rate, i.e. all parameters of the model are set to 0.
        base_rates.append(self.model.get_base_rate())
        # Add the base rate where visible biases are the ones from the model.
        base_rates.append(self.model.get_base_rate('c'))
        # Add the base rate where hidden biases are the ones from the model.
        base_rates.append(self.model.get_base_rate('b'))  # Not implemented

        for base_rate, anneable_params in base_rates:
            base_rate_lnZ = base_rate.compute_lnZ().eval().astype(
                config.floatX)

            brute_force_lnZ = logsumexp(-base_rate.E(V, H)).eval()
            assert_almost_equal(brute_force_lnZ.astype(config.floatX),
                                base_rate_lnZ,
                                decimal=6)

            theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval()
            assert_almost_equal(theano_lnZ.astype(config.floatX),
                                base_rate_lnZ,
                                decimal=6)

            theano_lnZ = logsumexp(-base_rate.marginalize_over_v(H)).eval()
            assert_almost_equal(theano_lnZ.astype(config.floatX),
                                base_rate_lnZ,
                                decimal=6)

    @npt.dec.slow
    def test_binomial_from_uniform_cpu(self):
        #Test using numpy
        rng = np.random.RandomState(42)
        probs = rng.rand(10)

        seed = 1337
        nb_samples = 1000000
        rng = np.random.RandomState(seed)
        success1 = np.zeros(len(probs))
        for i in range(nb_samples):
            success1 += rng.binomial(n=1, p=probs)

        rng = np.random.RandomState(seed)
        success2 = np.zeros(len(probs))
        for i in range(nb_samples):
            success2 += (rng.rand(len(probs)) < probs).astype('int')

        success1 = success1 / nb_samples
        success2 = success2 / nb_samples

        assert_array_almost_equal(success1, success2)

        #Test using Theano's default RandomStreams
        theano_rng = RandomStreams(1337)
        rng_bin = theano_rng.binomial(size=probs.shape,
                                      n=1,
                                      p=probs,
                                      dtype=theano.config.floatX)
        success1 = np.zeros(len(probs))
        for i in range(nb_samples):
            success1 += rng_bin.eval()

        theano_rng = RandomStreams(1337)
        rng_bin = theano_rng.uniform(size=probs.shape,
                                     dtype=theano.config.floatX) < probs
        success2 = np.zeros(len(probs))
        for i in range(nb_samples):
            success2 += rng_bin.eval()

        assert_array_almost_equal(success1 / nb_samples, success2 / nb_samples)

        #Test using Theano's sandbox MRG RandomStreams
        theano_rng = MRG_RandomStreams(1337)
        success1 = theano_rng.binomial(size=probs.shape,
                                       n=1,
                                       p=probs,
                                       dtype=theano.config.floatX)

        theano_rng = MRG_RandomStreams(1337)
        success2 = theano_rng.uniform(size=probs.shape,
                                      dtype=theano.config.floatX) < probs

        assert_array_equal(success1.eval(), success2.eval())

    def test_gradients_auto_vs_manual(self):
        rng = np.random.RandomState(42)

        batch_size = 5
        input_size = 10

        rbm = RBM(input_size=input_size,
                  hidden_size=32,
                  CDk=1,
                  rng=np.random.RandomState(42))

        W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(
            theano.config.floatX)
        rbm.W = theano.shared(value=W.astype(theano.config.floatX),
                              name='b',
                              borrow=True)

        b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX)
        rbm.b = theano.shared(value=b.astype(theano.config.floatX),
                              name='b',
                              borrow=True)

        c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX)
        rbm.c = theano.shared(value=c.astype(theano.config.floatX),
                              name='c',
                              borrow=True)

        params = [rbm.W, rbm.b, rbm.c]
        chain_start = T.matrix('start')
        chain_end = T.matrix('end')

        chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(
            theano.config.floatX)
        chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(
            theano.config.floatX)
        chain_start.tag.test_value = chain_start_value
        chain_end.tag.test_value = chain_end_value

        ### Computing gradients using automatic differentation ###
        cost = T.mean(rbm.free_energy(chain_start)) - T.mean(
            rbm.free_energy(chain_end))
        gparams_auto = T.grad(cost, params, consider_constant=[chain_end])

        ### Computing gradients manually ###
        h = rbm.sample_h_given_v(chain_start, return_probs=True)
        _h = rbm.sample_h_given_v(chain_end, return_probs=True)

        grad_W = (T.dot(chain_end.T, _h) -
                  T.dot(chain_start.T, h)).T / batch_size
        grad_b = T.mean(_h - h, 0)
        grad_c = T.mean(chain_end - chain_start, 0)

        gparams_manual = [grad_W, grad_b, grad_c]
        grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c"

        for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual):
            param1 = gparam_auto.eval({
                chain_start: chain_start_value,
                chain_end: chain_end_value
            })
            param2 = gparam_manual.eval({
                chain_start: chain_start_value,
                chain_end: chain_end_value
            })
            assert_array_almost_equal(param1,
                                      param2,
                                      err_msg=gparam_manual.name)
Example #20
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    # Check that a least one of --view or --save has been given.
    if not args.view and not args.save:
        parser.error(
            "At least one the following options must be chosen: --view or --save"
        )

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    if not os.path.isfile(pjoin(experiment_path, "model.pkl")):
        parser.error(
            'Cannot find model for experiment: {0}!'.format(experiment_path))

    if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")):
        parser.error('Cannot find hyperparams for experiment: {0}!'.format(
            experiment_path))

    # Load experiments hyperparameters
    hyperparams = utils.load_dict_from_json_file(
        pjoin(experiment_path, "hyperparams.json"))

    with Timer("Loading model"):
        if hyperparams["model"] == "rbm":
            from iRBM.models.rbm import RBM
            model_class = RBM
        elif hyperparams["model"] == "orbm":
            from iRBM.models.orbm import oRBM
            model_class = oRBM
        elif hyperparams["model"] == "irbm":
            from iRBM.models.irbm import iRBM
            model_class = iRBM

        # Load the actual model.
        model = model_class.load(pjoin(experiment_path, "model.pkl"))

    rng = np.random.RandomState(args.seed)

    # Sample from uniform
    # TODO: sample from Bernouilli distribution parametrized with visible biases
    chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype(
        theano.config.floatX)

    with Timer("Building sampling function"):
        v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX))
        v1 = model.gibbs_step(v0)
        gibbs_step = theano.function([], updates={v0: v1})

        if args.full_gibbs_step:
            print "Using z=K"
            # Use z=K for first Gibbs step.
            from iRBM.models.rbm import RBM
            h0 = RBM.sample_h_given_v(model, v0)
            v1 = RBM.sample_v_given_h(model, h0)
            v0.set_value(v1.eval())

    with Timer("Sampling"):
        for k in range(args.cdk):
            gibbs_step()

    samples = v0.get_value()

    if args.save:
        np.savez(args.out, samples)

    if args.view:
        if hyperparams["dataset"] == "binarized_mnist":
            image_shape = (28, 28)
        elif hyperparams["dataset"] == "caltech101_silhouettes28":
            image_shape = (28, 28)
        else:
            raise ValueError("Unknown dataset: {0}".format(
                hyperparams["dataset"]))

        data = vizu.concatenate_images(samples,
                                       shape=image_shape,
                                       border_size=1,
                                       clim=(0, 1))
        plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest')
        plt.show()