예제 #1
0
    def test_poisson(self):
        # Test that RandomStreams.poisson generates the same results as numpy

        # Check over two calls to see if the random state is correctly updated.
        random = RandomStreams(utt.fetch_seed())
        fn = function([], random.poisson(lam=5, size=(11, 8)))
        fn_val0 = fn()
        fn_val1 = fn()

        rng_seed = np.random.RandomState(utt.fetch_seed()).randint(2**30)
        rng = np.random.RandomState(int(rng_seed))  # int() is for 32bit
        numpy_val0 = rng.poisson(lam=5, size=(11, 8))
        numpy_val1 = rng.poisson(lam=5, size=(11, 8))

        assert np.all(fn_val0 == numpy_val0)
        assert np.all(fn_val1 == numpy_val1)
    def test_poisson(self):
        """Test that RandomStreams.poisson generates the same results as numpy"""
        
        # Check over two calls to see if the random state is correctly updated.
        random = RandomStreams(utt.fetch_seed())
        fn = function([], random.poisson(lam=5, size=(11, 8)))
        fn_val0 = fn()
        fn_val1 = fn()

        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
        rng = numpy.random.RandomState(int(rng_seed))  # int() is for 32bit
        numpy_val0 = rng.poisson(lam=5, size=(11, 8))
        numpy_val1 = rng.poisson(lam=5, size=(11, 8))

        assert numpy.all(fn_val0 == numpy_val0)
        assert numpy.all(fn_val1 == numpy_val1)
예제 #3
0
class RBM(Model):
    def __init__(self, v_dim=784, h_dim=500, input_type=InputType.binary,
                 W=None, b_h=None, b_v=None, sigma=None, input_var=None,
                 mrng=None, rng=None, name='', **kwargs):

        name = 'rbm' if name == '' else name
        super(RBM, self).__init__(name=name)

        model_file = kwargs.get('model_file')
        if model_file is not None:
            self.load(model_file)
            self._load_params()

        else:
            # v_dim is the dimensions of visible variable v. v_dim = D
            self.v_dim = v_dim
            # v_dim is the dimensions of visible variable v. h_dim = H
            self.h_dim = h_dim
            self.input_type = input_type

            seed = np.random.randint(1, 2**30)
            self._rng = RandomStreams(seed) if rng is None else rng
            self._mrng = MRG_RandomStreams(seed) if mrng is None else mrng

            self._build_params(W, b_h, b_v, sigma)

        self.input = input_var if input_var is not None else T.matrix('input')
        if self.input_type == InputType.poisson or self.input_type == InputType.replicated_softmax:
            self.total_count = T.sum(self.input, axis=1, keepdims=True)

    def _load_params(self):
        [self.W, self.b_h, self.b_v] = self.params

    def _build_params(self, W, b_h, b_v, sigma):
        self.params = []

        self.W = W if W is not None else init_weight(self.v_dim, self.h_dim, name=self.name+'-W')
        self.b_h = b_h if b_h is not None else init_bias(self.h_dim, name=self.name+'-b_h')
        self.b_v = b_v if b_v is not None else init_bias(self.v_dim, name=self.name+'-b_v')

        # sigma_v is not considered to be a param
        self.params.extend([self.W, self.b_h, self.b_v])

        # Truong hop gaussian co them sigma
        self.sigma_v = None
        if self.input_type == InputType.gaussian:
            print "Your input must be whitened to achieve the desire result."

            if sigma is not None:
                sigma = np.asarray(sigma)
                if sigma.ndim == 0:
                    print "Sigma is set to {} for all input dimensions.".format(sigma)
                    self.sigma_v = theano.shared(sigma * np.ones((self.v_dim, ), dtype=theano.config.floatX))
                    self.sigma_v.name = self.name + "-sigma_v"
                else:
                    assert sigma.ndim == 1 and sigma.shape[0] == self.v_dim, \
                        "Sigma must be 1D array with the length of {}".format(self.v_dim)
                    self.sigma_v = theano.shared(sigma)
                    self.sigma_v.name = self.name + "-sigma_v"
            else:
                print "Default value of sigma is 1.0 for all input dimensions."
                self.sigma_v = theano.shared(np.ones(self.v_dim, dtype=theano.config.floatX))
                self.sigma_v.name = self.name + "-sigma_v"

    def print_model_info(self):
        print "\nInfo of model {}".format(self.name)
        print "v_dims: {} | h_dim: {} | input_type: {}".format(self.v_dim, self.h_dim, self.input_type)

    def get_save(self):
        return [self.name, self.v_dim, self.h_dim, self.input_type,
                self._mrng, self._rng, self.params, self.sigma_v]

    def set_load(self, saved_data):
        [self.name, self.v_dim, self.h_dim, self.input_type,
         self._mrng, self._rng, self.params, self.sigma_v] = saved_data

    def score(self, v_data):
        free_fn = theano.function([self.input], self.free_energy(self.input))
        return free_fn(v_data)

    def reconstruct(self, v_data):
        h = self.h_given_v(self.input)
        rv = self.v_given_h(h)
        rec_fn = theano.function([self.input], rv)
        return rec_fn(v_data)

    def reconstruct_from_hidden(self, h_data):
        h = self.input.type('hidden')
        rv = self.v_given_h(h)
        rec_fn = theano.function([h], rv)
        return rec_fn(h_data)

    def encode(self, v_data):
        h_code = self.h_given_v(self.input)
        fn = theano.function([self.input], h_code)
        return fn(v_data)

    # Energy from many v an 1 h
    def energy(self, v, h):
        v_free = self.v_free_term(v)
        v_bias = self.v_bias_term(v)
        v_weight = self.v_weight_term(v)

        return -(v_free + v_bias + v_weight * h + T.dot(h, self.b_h))

    def free_energy(self, v):
        v_free = self.v_free_term(v)
        v_bias = self.v_bias_term(v)
        v_weight = self.v_weight_term(v)

        h_term = T.sum(T.log(1 + T.exp(v_weight + self.b_h)), axis=1)
        return -(v_bias + v_free + h_term)

    def v_weight_term(self, v):
        if self.input_type == InputType.gaussian:
            return T.dot(v/(self.sigma_v ** 2), self.W)
        else:
            return T.dot(v, self.W)

    def v_bias_term(self, v):
        # Note that for gaussian case, the v_bias should be negative
        if self.input_type == InputType.gaussian:
            return -T.sum((v - self.b_v) ** 2 / (2 * self.sigma_v ** 2), axis=1)
        else:
            return T.dot(v, self.b_v)

    def v_free_term(self, v):
        if self.input_type == InputType.poisson:
            return -T.sum(T.gammaln(1 + v), axis=1)
        else:
            return 0

    def rv(self, v):
        h = self.h_given_v(v)
        rv = self.v_given_h(h)
        return rv

    def h_given_v(self, v):
        v_weight = self.v_weight_term(v)
        p_h_v = T.nnet.sigmoid(v_weight + self.b_h)
        return p_h_v

    def v_given_h(self, h):
        if self.input_type == InputType.binary:
            p_v_h = T.nnet.sigmoid(self.b_v + T.dot(h, self.W.T))
            return p_v_h

        elif self.input_type == InputType.gaussian:
            mu_v = self.b_v + T.dot(h, self.W.T)
            return mu_v

        elif self.input_type == InputType.categorical or \
             self.input_type == InputType.replicated_softmax:
            p_v_h = T.nnet.softmax(self.b_v + T.dot(h, self.W.T))
            return p_v_h

        elif self.input_type == InputType.poisson:
            if not hasattr(self, 'total_count') or self.total_count is None:
                raise ValueError('Total count should be set for constrained Poisson')

            unconstrained_lmbd_v = T.exp(self.b_v + T.dot(h, self.W.T))
            lmbd_v = unconstrained_lmbd_v * 1.0 / T.sum(unconstrained_lmbd_v, axis=1, keepdims=True) \
                     * self.total_count
            return lmbd_v

    def sample_h_given_v(self, v0_sample):
        h1_mean = self.h_given_v(v0_sample)
        h1_sample = self._mrng.binomial(size=h1_mean.shape, n=1, p=h1_mean,
                                        dtype=theano.config.floatX)
        return [h1_mean, h1_sample]

    def sample_v_given_h(self, h0_sample):
        if self.input_type == InputType.binary:
            v1_mean = self.v_given_h(h0_sample)
            v1_sample = self._mrng.binomial(size=v1_mean.shape, n=1, p=v1_mean,
                                           dtype=theano.config.floatX)
            return [v1_mean, v1_sample]

        elif self.input_type == InputType.gaussian:
            mu_v1 = self.v_given_h(h0_sample)  # Note that mu_v1 is returned

            v1_sample = self._mrng.normal(size=mu_v1.shape, avg=mu_v1, std=self.sigma_v,
                                          dtype=theano.config.floatX)
            return [mu_v1, v1_sample]
        # Note that there is constraint in the case of Multinomial

        elif self.input_type == InputType.categorical:
            prob_v1 = self.v_given_h(h0_sample)
            # Multinomial with n=1 (It is equal to categorical)
            v1_sample = self._mrng.multinomial(pvals=prob_v1, n=1, dtype=theano.config.floatX)
            return [prob_v1, v1_sample]

        elif self.input_type == InputType.poisson:
            lmbd_v1 = self.v_given_h(h0_sample)
            # We have to use RandomStreams, not MRG_RandomStreams
            v1_sample = self._rng.poisson(size=lmbd_v1.shape, lam=lmbd_v1,
                                          dtype=theano.config.floatX)
            return [lmbd_v1, v1_sample]

        elif self.input_type == InputType.replicated_softmax:
            if not hasattr(self, 'total_count') or self.total_count is None:
                raise ValueError('Total count should be set for replicated Softmax')

            prob_v1 = self.v_given_h(h0_sample)
            # We have to sample the vocabulary distribution given topic D times and sum over D samples
            v1_sample = self._mrng.multinomial(pvals=prob_v1, n=self.total_count, ndim=prob_v1.shape[1])
            return [prob_v1, v1_sample]

    # One step of gibbs sampling
    def gibbs_hvh(self, h0_sample):
        # Here we use v1_stat to show that it is sufficient statistics of v1
        [v1_stat, v1_sample] = self.sample_v_given_h(h0_sample)
        [h1_mean, h1_sample] = self.sample_h_given_v(v1_sample)

        return [v1_stat, v1_sample, h1_mean, h1_sample]

    def gibbs_vhv(self, v0_sample):
        [h1_mean, h1_sample] = self.sample_h_given_v(v0_sample)
        [v1_stat, v1_sample] = self.sample_v_given_h(h1_sample)

        return [h1_mean, h1_sample, v1_stat, v1_sample]

    def run_CD_from_h(self, k, data_h):
        start_h = T.matrix("start_h")
        # [v_stats, v_samples, h_means, h_samples], updates \
        outputs, updates \
            = theano.scan(fn=self.gibbs_hvh, outputs_info=[None, None, None, start_h],
                          n_steps=k, name="gibbs_hvh")
        # Return the last h_sample after k steps
        CD_fn = theano.function([start_h], outputs=outputs[-1], updates=updates)
        return CD_fn(data_h)

    def run_CD_from_v(self, k, data_v):
        start_v = T.matrix("start_v")
        # [h_means, h_samples, v_stats, v_samples], updates \
        outputs, updates \
            = theano.scan(fn=self.gibbs_vhv, outputs_info=[None, None, None, start_v],
                          n_steps=k, name="gibbs_vhv")
        # Return the last v_sample after k steps
        CD_fn = theano.function([start_v], outputs=outputs[-1], updates=updates)
        return CD_fn(data_v)

    # Return visible variables
    def _gibbs_vhv_to_v_fn(self, steps, persis_v, is_sample=True, name=''):
        [h_means, h_samples, v_stats, v_samples], updates \
            = theano.scan(self.gibbs_vhv,
                          outputs_info=[None, None, None, persis_v],
                          n_steps=steps,  # init_gibbs dung de init
                          name='gibbs_vhv')
        updates.update({persis_v: v_samples[-1]})
        if is_sample:
            gibbs_fn = theano.function([], v_samples[-1], updates=updates, name=name)
        else:
            gibbs_fn = theano.function([], v_stats[-1], updates=updates, name=name)
        return gibbs_fn

    # Also return visible variables
    def _gibbs_hvh_to_v_fn(self, steps, persis_h, is_sample=True, name=''):
        [v_stats, v_samples, h_means, h_samples], updates \
            = theano.scan(self.gibbs_hvh,
                          outputs_info=[None, None, None, persis_h],
                          n_steps=steps,  # init_gibbs dung de init
                          name='gibbs_hvh')
        updates.update({persis_h: h_samples[-1]})
        if is_sample:
            gibbs_fn = theano.function([], v_samples[-1], updates=updates, name=name)
        else:
            gibbs_fn = theano.function([], v_stats[-1], updates=updates, name=name)
        return gibbs_fn

    def sample_given_data(self, v_data, init_gibbs=1000, betw_gibbs=100, loops=10, is_sample=False):
        print "\nSample data from input using model {}".format(self.name)
        # Neu kich thuoc input la 1 thi phai chuyen no ve kich thuoc 2
        if len(v_data.shape) == 1:
            persis_v = theano.shared(np.asarray(v_data.reshape(1, v_data.shape[0]),
                                                dtype=theano.config.floatX))
        else:
            persis_v = theano.shared(np.asarray(v_data, dtype=theano.config.floatX))

        if init_gibbs > 0:
            init_sampling_fn = self._gibbs_vhv_to_v_fn(init_gibbs, persis_v,
                                                       is_sample=True, name='init_sampling_fn')
        else:
            init_sampling_fn = None

        sample_fn = self._gibbs_vhv_to_v_fn(betw_gibbs, persis_v,
                                            is_sample=is_sample, name='sample_fn')

        rvs_data = []
        if init_sampling_fn is not None:
            init_sampling_fn()
        for idx in range(loops):
            print "Running sampling loop %d" % idx
            rv_data = sample_fn()
            rvs_data.append(rv_data)

        return np.asarray(rvs_data)

    # Sample randomly
    # We start from h and run gibbs chain until it reaches equilibrium
    def sample(self, init_gibbs=1000, betw_gibbs=100, n_samples=20, loops=10, is_sample=False):
        print "\nSample random data using model {}".format(self.name)
        persis_h = theano.shared(np.zeros((n_samples, self.h_dim), dtype=theano.config.floatX))

        if init_gibbs > 0:
            init_sampling_fn = self._gibbs_hvh_to_v_fn(init_gibbs, persis_h,
                                                       is_sample=True, name='init_sampling_fn')
        else:
            init_sampling_fn = None
        sample_fn = self._gibbs_hvh_to_v_fn(betw_gibbs, persis_h,
                                            is_sample=is_sample, name='sample_fn')

        rvs_data = []
        if init_sampling_fn is not None:
            init_sampling_fn()
        for idx in range(loops):
            print "Running sampling loop %d" % idx
            rv_data = sample_fn()
            rvs_data.append(rv_data)

        return np.asarray(rvs_data)

    def get_cost_udpates(self, lr, k, persis_h, l1, l2, stable_update, store_grad):
        # Run one sample step to get h
        h_mean, h_sample = self.sample_h_given_v(self.input)

        # Run normal CD
        start_h = persis_h if persis_h is not None else h_sample

        [v_stats, v_samples, h_means, h_samples], updates \
            = theano.scan(fn=self.gibbs_hvh, outputs_info=[None, None, None, start_h],
                          n_steps=k, name="gibbs_hvh")

        vk = v_samples[-1]
        v_stat_k = v_stats[-1]

        if persis_h is not None:
            updates[persis_h] = h_samples[-1]

        cost = self.get_viewed_cost(self.input, v_stat_k)
        cost = T.mean(cost)

        # For stable update, use mean value instead of random sampled value
        if stable_update:
            print "\nStable update is set to be True"
            updates = self.params_updates(self.input, v_stat_k, lr, l1, l2, updates, store_grad)
        else:
            print "\nStable update is set to be False"
            updates = self.params_updates(self.input, vk, lr, l1, l2, updates, store_grad)

        # return cost, updates
        return cost, updates

    def get_viewed_cost(self, v0, vk_stat):
        # Binary cross-entropy
        cost = 0
        if self.input_type == InputType.binary:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat) + (1 - v0) * T.log(1 - clip_vk_stat), axis=1)

        # Sum square error
        elif self.input_type == InputType.gaussian:
            cost = T.sum((v0 - vk_stat) ** 2, axis=1)

        # Categorical cross-entropy
        elif self.input_type == InputType.categorical:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat), axis=1)

        elif self.input_type == InputType.poisson:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum(-vk_stat + v0 * T.log(clip_vk_stat) - T.gammaln(1 + v0), axis=1)

        if self.input_type == InputType.replicated_softmax:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum((v0 / self.total_count) * T.log(clip_vk_stat), axis=1)

        return cost

    def params_updates(self, v0, vk, lr, l1, l2, updates, store_grad):
        if updates is None:
            updates = OrderedDict()
        if store_grad:
            self.stored_grads = OrderedDict()

        grads = [0 for _ in xrange(len(self.params))]

        o_grads = self.nll_grad_formula(v0, vk)
        grads = [grads[i] + o_grads[i] for i in xrange(len(self.params))]

        if store_grad:
            print "\nGradients over negative log-likelihood are stored in original_grads"
            o_shared_grads, updates = store_grads_in_update(self.params, o_grads, updates)
            self.stored_grads['original_grads'] = o_shared_grads

        if l1 is not None:
            print "Add L1 regularization ({}) to parameter updates".format(l1)
            l1_gW = l1_grad(self.W, l1)
            grads[0] = grads[0] + l1_gW

            if store_grad:
                print "\nGradients over L1 regularization are stored in l1_grads"
                l1_shared_grads, updates = store_grads_in_update([self.W], [l1_gW], updates)
                self.stored_grads['l1_grads'] = l1_shared_grads

        if l2 is not None:
            print "Add L2 regularization ({}) to parameter updates".format(l2)
            l2_gW = l2_grad(self.W, l2)
            grads[0] = grads[0] + l2_gW

            if store_grad:
                print "\nGradients over L2 regularization are stored in l2_grads"
                l2_shared_grads, updates = store_grads_in_update([self.W], [l2_gW], updates)
                self.stored_grads['l2_grads'] = l2_shared_grads

        if store_grad:
            print "\nGradients over total cost are stored in total_grads"
            t_shared_grads, updates = store_grads_in_update(self.params, grads, updates)
            self.stored_grads['total_grads'] = t_shared_grads

        grads = [grad.astype(theano.config.floatX) for grad in grads]

        if self.check_learning_algor():
            params_updates = self.learning_algor(grads, self.params, lr, **self.learning_config)
            updates.update(params_updates)
        else:
            print "\nSimple SGD is used as training algorithm"
            for grad, param in zip(grads, self.params):
                updates[param] = param - grad * lr

        return updates

    def nll_grad_formula(self, v0, vk):
        n_instances = v0.shape[0]

        h0 = self.h_given_v(v0)
        hk = self.h_given_v(vk)

        gW = (T.dot(vk.T, hk) - T.dot(v0.T, h0)) / n_instances
        gb_h = T.mean(hk - h0, axis=0)

        if self.input_type == InputType.gaussian:
            gb_v = T.mean((vk - v0) / (self.sigma_v ** 2), axis=0)

            ugz_v = (((vk - self.b_v) ** 2 - 2 * vk * T.dot(hk, self.W.T)) - \
                    ((v0 - self.b_v) ** 2 - 2 * v0 * T.dot(h0, self.W.T))) / (self.sigma_v ** 2)
            gz_v = T.mean(ugz_v, axis=0)

            grads = [gW, gb_h, gb_v, gz_v]

        else:
            gb_v = T.mean(vk - v0, axis=0)
            grads = [gW, gb_h, gb_v]

        return grads

    def nll_grad_theano(self, v0, vk):
        cost = T.mean(self.free_energy(v0)) - T.mean(self.free_energy(vk))
        # Note here we have to use consider_constant
        grads = T.grad(cost, self.params, consider_constant=[vk])
        return grads

    def grad_check(self, data_v0, data_vk):
        # data_v0 and data_vk is numpy array
        # data_vk is computed by calling CD-k
        v0 = T.matrix('v0')
        vk = T.matrix('vk')

        theano_grads = self.nll_grad_theano(v0, vk)
        formula_grads = self.nll_grad_formula(v0, vk)

        grad_diffs = []
        for t_grad, f_grad in zip(theano_grads, formula_grads):
            grad_diffs.append(abs(t_grad - f_grad))

        grad_test_fn = theano.function([v0, vk], grad_diffs)

        diffs_results = grad_test_fn(data_v0, data_vk)
        for i in xrange(len(self.params)):
            if self.params[i].name is not None:
                name = self.params[i].name
            else:
                name = ""
            print ("Max " + name + " diffs: {}").format(np.max(diffs_results[i]))
            print ("Min " + name + " diffs: {}").format(np.min(diffs_results[i]))
            print ("Average " + name + " diffs: {}").format(np.mean(diffs_results[i]))
            print
        return diffs_results

    def config_train(self, **kwargs):
        k = kwargs.get('CD_k')
        persis_h_data = kwargs.get('persis_h')
        l1 = kwargs.get('L1')
        l2 = kwargs.get('L2')

        if l1 is None:
            print "L1 should be set to enable sparse weight regularization"
        if l2 is None:
            print "L2 should be set to enable sparse weight regularization"

        stable_update = kwargs.get('stable_update')
        if stable_update is None:
            stable_update = False

        store_grad = kwargs.get('store_grad')
        if store_grad is None:
            store_grad = False

        self._build_train(k, persis_h_data, l1, l2, stable_update, store_grad)

    # persis_v_data is a numpy array
    def _build_train(self, k, persis_h_data, l1, l2, stable_update, store_grad):
        print "\nBuild training function of model {}".format(self.name)

        if persis_h_data is not None:
            persis_h = theano.shared(persis_h_data, borrow=True)
        else:
            persis_h = None

        lr = T.scalar('lr')
        cost, updates = self.get_cost_udpates(lr, k, persis_h, l1, l2, stable_update, store_grad)
        print "\nBuild computation graph for training function of model {}".format(self.name)
        self.train_fn = theano.function([self.input, lr], cost, updates=updates)

        rv = self.v_given_h(self.h_given_v(self.input))
        test_cost = self.get_viewed_cost(self.input, rv)
        test_cost = T.mean(test_cost)
        print "\nBuild computation graph for validation function of model {}".format(self.name)
        self.valid_fn = theano.function([self.input], test_cost)
예제 #4
0
class VisibleLayer(object):
    def __init__(self, v_dim, h_dim, v_type, mrng=None, rng=None, name=''):

        self.name = name if name != '' else 'v_layer'

        self.v_dim = v_dim
        self.h_dim = h_dim
        self.v_type = v_type

        seed = np.random.randint(1, 2**30)
        self._rng = RandomStreams(seed) if rng is None else rng
        self._mrng = MRG_RandomStreams(seed) if mrng is None else mrng

        self._build_params()

    def set_total_count(self, total_count):
        if not (self.v_type == InputType.poisson):
            raise ValueError(
                "The input type should be Poisson to set total count")
        self.total_count = total_count

    def _build_params(self):
        # W to connect with hidden layer
        self.params = []
        if self.v_type == InputType.poisson:
            init_W = np.random.uniform(low=-1 / self.h_dim,
                                       high=1 / self.h_dim,
                                       size=(self.v_dim, self.h_dim))
            self.W = init_weight(self.v_dim,
                                 self.h_dim,
                                 value=init_W,
                                 name=self.name + '-W')
        else:
            self.W = init_weight(self.v_dim, self.h_dim, name=self.name + '-W')
        self.b_v = init_bias(self.v_dim, name=self.name + '-b_v')

        # Ca binary, gaussian, and categorical
        self.params.extend([self.W, self.b_v])

        # Truong hop gaussian co them sigma
        if self.v_type == InputType.gaussian:
            self.sigma_v = T.ones(shape=(self.v_dim, ),
                                  dtype=theano.config.floatX)
            self.sigma_v.name = self.name + "-sigma_v"

    # Result in a vector of (n, 1)
    def v_free_term(self, v):
        if self.v_type == InputType.poisson:
            return -T.sum(T.gammaln(1 + v), axis=1)
        else:
            return 0

    # Result in a vector of (n, 1)
    def v_bias_term(self, v):
        # Note that for gaussian case, the v_bias should be negative
        if self.v_type == InputType.gaussian:
            return -T.sum((v - self.b_v)**2 / (2 * self.sigma_v**2), axis=1)
        else:
            return T.dot(v, self.b_v)

    # Result in a vector of (n, H)
    def v_weight_term(self, v):
        if self.v_type == InputType.gaussian:
            return T.dot((v / (self.sigma_v**2)), self.W)
        else:
            return T.dot(v, self.W)

    # Only support binary, gaussian and categorical
    def v_given_h(self, h):
        if self.v_type == InputType.binary:
            p_v_h = T.nnet.sigmoid(self.b_v + T.dot(h, self.W.T))
            return p_v_h

        elif self.v_type == InputType.gaussian:
            mu_v = self.b_v + T.dot(h, self.W.T)
            return mu_v

        elif self.v_type == InputType.categorical:
            p_v_h = T.nnet.softmax(self.b_v + T.dot(h, self.W.T))
            return p_v_h

        elif self.v_type == InputType.poisson:
            if not hasattr(self, 'total_count') or self.total_count is None:
                raise ValueError(
                    'Total count should be set for constrained Poisson')

            unconstrained_lmbd_v = T.exp(self.b_v + T.dot(h, self.W.T))
            lmbd_v = unconstrained_lmbd_v * 1.0 / T.sum(unconstrained_lmbd_v, axis=1, keepdims=True) \
                     * self.total_count
            return lmbd_v

    # Only support binary, gaussian and categorical
    def sample_v_given_h(self, h0_sample):
        if self.v_type == InputType.binary:
            v1_mean = self.v_given_h(h0_sample)
            v1_sample = self._mrng.binomial(size=v1_mean.shape,
                                            n=1,
                                            p=v1_mean,
                                            dtype=theano.config.floatX)
            return [v1_mean, v1_sample]

        elif self.v_type == InputType.gaussian:
            mu_v1 = self.v_given_h(h0_sample)  # Note that mu_v1 is returned

            v1_sample = self._mrng.normal(size=mu_v1.shape,
                                          avg=mu_v1,
                                          std=self.sigma_v,
                                          dtype=theano.config.floatX)
            return [mu_v1, v1_sample]

        # Note that there is constraint in the case of Multinomial
        elif self.v_type == InputType.categorical:
            prob_v1 = self.v_given_h(h0_sample)
            v1_sample = self._mrng.multinomial(pvals=prob_v1,
                                               n=1,
                                               dtype=theano.config.floatX)
            return [prob_v1, v1_sample]

        elif self.v_type == InputType.poisson:
            lmbd_v1 = self.v_given_h(h0_sample)
            # We have to use RandomStreams, not MRG_RandomStreams
            v1_sample = self._rng.poisson(size=lmbd_v1.shape,
                                          lam=lmbd_v1,
                                          dtype=theano.config.floatX)
            return [lmbd_v1, v1_sample]

    def l1_grad(self, l1):
        gW = l1_grad(self.W, l1)
        return [gW, 0]

    def l2_grad(self, l2):
        gW = l2_grad(self.W, l2)
        return [gW, 0]

    def nll_grad_formula(self, v0, vk, h0, hk):
        n_instances = v0.shape[0]

        gW = (T.dot(vk.T, hk) - T.dot(v0.T, h0)) / n_instances

        if self.v_type == InputType.gaussian:
            gb_v = T.mean((vk - v0) / (self.sigma_v**2), axis=0)
            grads = [gW, gb_v]
        else:
            gb_v = T.mean(vk - v0, axis=0)
            grads = [gW, gb_v]

        return grads

    def get_viewed_cost(self, v0, vk_stat):
        # Binary cross-entropy
        cost = 0
        if self.v_type == InputType.binary:
            # Clip to avoid log(0)
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001),
                                  np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat) +
                          (1 - v0) * T.log(1 - clip_vk_stat),
                          axis=1)

        # Sum square error
        elif self.v_type == InputType.gaussian:
            cost = T.sum((v0 - vk_stat)**2, axis=1)

        # Categorical cross-entropy
        elif self.v_type == InputType.categorical:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001),
                                  np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat), axis=1)

        elif self.v_type == InputType.poisson:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum(
                -vk_stat + v0 * T.log(clip_vk_stat) - T.gammaln(1 + v0),
                axis=1)

        return cost

    def get_params(self):
        return self.params