Example #1
0
    def expected_energy(self, V_hat, H_hat):
        """ expected energy of the model under the mean field distribution
            defined by V_hat and H_hat
            alternately, could be expectation of the energy function across
            a batch of examples, where every element of V_hat and H_hat is
            a binary observation
        """


        V_name = make_name(V_hat, 'anon_V_hat')
        assert isinstance(H_hat, (list,tuple))

        H_names = []
        for i in xrange(len(H_hat)):
            H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) ))

        m = V_hat.shape[0]
        m.name = V_name + '.shape[0]'

        assert len(H_hat) == len(self.rbms)

        v = T.mean(V_hat, axis=0)

        v_bias_contrib = T.dot(v, self.bias_vis)

        exp_vh = T.dot(V_hat.T,H_hat[0]) / m

        v_weights_contrib = T.sum(self.W[0] * exp_vh)

        v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')'

        total = v_bias_contrib + v_weights_contrib

        for i in xrange(len(H_hat) - 1):
            lower_H = H_hat[i]
            low = T.mean(lower_H, axis = 0)
            higher_H = H_hat[i+1]
            exp_lh = T.dot(lower_H.T, higher_H) / m
            lower_bias = self.bias_hid[i]
            W = self.W[i+1]

            lower_bias_contrib = T.dot(low, lower_bias)

            weights_contrib = T.sum( W * exp_lh) / m

            total = total + lower_bias_contrib + weights_contrib

        highest_bias_contrib = T.dot(T.mean(H_hat[-1],axis=0), self.bias_hid[-1])

        total = total + highest_bias_contrib

        assert len(total.type.broadcastable) == 0

        rval =  - total

        #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')'

        return rval
Example #2
0
    def __call__(self, X, Y=None, X_space=None):
        """
        .. todo::

            WRITEME

        Note that calling this repeatedly will yield the same random numbers each time.
        """
        assert X_space is not None
        self.called = True
        assert X.dtype == config.floatX
        if not hasattr(self, 'seed'):
            self.seed = default_seed
        theano_rng = RandomStreams(self.seed)

        if X.ndim == 2 and self.sync_channels:
            raise NotImplementedError()

        p = self.drop_prob

        if not hasattr(self, 'drop_prob_y') or self.drop_prob_y is None:
            yp = p
        else:
            yp = self.drop_prob_y

        batch_size = X_space.batch_size(X)

        if self.balance:
            flip = theano_rng.binomial(size=(batch_size, ),
                                       p=0.5,
                                       n=1,
                                       dtype=X.dtype)

            yp = flip * (1 - p) + (1 - flip) * p

            dimshuffle_args = ['x'] * X.ndim

            if X.ndim == 2:
                dimshuffle_args[0] = 0
                assert not self.sync_channels
            else:
                dimshuffle_args[X_space.axes.index('b')] = 0
                if self.sync_channels:
                    del dimshuffle_args[X_space.axes.index('c')]

            flip = flip.dimshuffle(*dimshuffle_args)

            p = flip * (1 - p) + (1 - flip) * p

        #size needs to have a fixed length at compile time or the
        #theano random number generator will be angry
        size = tuple([X.shape[i] for i in xrange(X.ndim)])
        if self.sync_channels:
            del size[X_space.axes.index('c')]

        drop_mask = theano_rng.binomial(size=size, p=p, n=1, dtype=X.dtype)

        X_name = make_name(X, 'anon_X')
        drop_mask.name = 'drop_mask(%s)' % X_name

        if Y is not None:
            assert isinstance(yp, float) or yp.ndim < 2
            drop_mask_Y = theano_rng.binomial(size=(batch_size, ),
                                              p=yp,
                                              n=1,
                                              dtype=X.dtype)
            assert drop_mask_Y.ndim == 1
            Y_name = make_name(Y, 'anon_Y')
            drop_mask_Y.name = 'drop_mask_Y(%s)' % Y_name
            #drop_mask = Print('drop_mask',attrs=['sum'])(drop_mask)
            #drop_mask_Y = Print('drop_mask_Y',attrs=['sum'])(drop_mask_Y)
            return drop_mask, drop_mask_Y

        return drop_mask
Example #3
0
    def expected_energy_batch(self, V_hat, H_hat, no_v_bias = False):
        """ expected energy of the model under the mean field distribution
            defined by V_hat and H_hat
            alternately, could be expectation of the energy function across
            a batch of examples, where every element of V_hat and H_hat is
            a binary observation
            if no_v_bias is True, ignores the contribution from biases on visible units
        """

        warnings.warn("TODO: write unit test verifying expected_energy_batch/m = expected_energy")

        V_name = make_name(V_hat, 'anon_V_hat')
        assert isinstance(H_hat, (list,tuple))

        H_names = []
        for i in xrange(len(H_hat)):
            H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) ))

        assert len(H_hat) == len(self.rbms)

        if no_v_bias:
            v_bias_contrib = 0.
        else:
            v_bias_contrib = T.dot(V_hat, self.bias_vis)


        assert len(V_hat.type.broadcastable) == 2
        assert len(self.W[0].type.broadcastable) == 2
        assert len(H_hat[0].type.broadcastable) == 2

        interm1 = T.dot(V_hat, self.W[0])
        assert len(interm1.type.broadcastable) == 2
        interm2 = interm1 * H_hat[0]
        assert len(interm2.type.broadcastable) == 2

        v_weights_contrib = interm2.sum(axis=1)

        v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')'
        assert len(v_weights_contrib.type.broadcastable) == 1

        total = v_bias_contrib + v_weights_contrib

        for i in xrange(len(H_hat) - 1):
            lower_H = H_hat[i]
            higher_H = H_hat[i+1]
            #exp_lh = T.dot(lower_H.T, higher_H) / m
            lower_bias = self.bias_hid[i]
            W = self.W[i+1]

            lower_bias_contrib = T.dot(lower_H, lower_bias)

            #weights_contrib = T.sum( W * exp_lh) / m
            weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1)

            cur_contrib = lower_bias_contrib + weights_contrib
            assert len(cur_contrib.type.broadcastable) == 1
            total = total + cur_contrib

        highest_bias_contrib = T.dot(H_hat[-1], self.bias_hid[-1])

        total = total + highest_bias_contrib

        assert len(total.type.broadcastable) == 1

        rval =  - total

        #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')'

        return rval
Example #4
0
File: dbm.py Project: vlb/pylearn
    def expected_energy(self, V_hat, H_hat, Y_hat = None, no_v_bias = False):
        """ expected energy of the model under the mean field distribution
            defined by V_hat and H_hat
            alternately, could be expectation of the energy function across
            a batch of examples, where every element of V_hat and H_hat is
            a binary observation
            if no_v_bias is True, ignores the contribution from biases on visible units
        """

        assert (Y_hat is None) == (self.num_classes == 0)

        V_name = make_name(V_hat, 'anon_V_hat')
        assert isinstance(H_hat, (list,tuple))

        H_names = []
        for i in xrange(len(H_hat)):
            H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) ))

        m = V_hat.shape[0]
        m.name = V_name + '.shape[0]'

        assert len(H_hat) == len(self.rbms)

        v = T.mean(V_hat, axis=0)

        if no_v_bias:
            v_bias_contrib = 0.
        else:
            v_bias_contrib = T.dot(v, self.bias_vis)

        #exp_vh = T.dot(V_hat.T,H_hat[0]) / m

        #v_weights_contrib = T.sum(self.W[0] * exp_vh)

        v_weights_contrib = (T.dot(V_hat, self.W[0]) * H_hat[0]).sum(axis=1).mean()

        v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')'

        total = v_bias_contrib + v_weights_contrib

        for i in xrange(len(H_hat) - 1):
            lower_H = H_hat[i]
            low = T.mean(lower_H, axis = 0)
            higher_H = H_hat[i+1]
            #exp_lh = T.dot(lower_H.T, higher_H) / m
            lower_bias = self.bias_hid[i]
            W = self.W[i+1]

            lower_bias_contrib = T.dot(low, lower_bias)

            #weights_contrib = T.sum( W * exp_lh) / m
            weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1).mean()

            total = total + lower_bias_contrib + weights_contrib

        highest_bias_contrib = T.dot(T.mean(H_hat[-1],axis=0), self.bias_hid[-1])

        total = total + highest_bias_contrib

        assert len(total.type.broadcastable) == 0

        if Y_hat is not None:
            weights_contrib = (T.dot(H_hat[-1], self.W_class) * Y_hat).sum(axis=1).mean()
            bias_contrib = T.dot(T.mean(Y_hat,axis=0), self.bias_class)
            total = total + weights_contrib + bias_contrib

        rval =  - total

        #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')'

        return rval
Example #5
0
    def expected_energy_batch(self, V_hat, H_hat, Y_hat = None, no_v_bias = False):
        """ expected energy of the model under the mean field distribution
            defined by V_hat and H_hat
            alternately, could be expectation of the energy function across
            a batch of examples, where every element of V_hat and H_hat is
            a binary observation
            if no_v_bias is True, ignores the contribution from biases on visible units
        """

        warnings.warn("TODO: write unit test verifying expected_energy_batch/m = expected_energy")

        assert (Y_hat is None) == (self.num_classes == 0)

        V_name = make_name(V_hat, 'anon_V_hat')
        assert isinstance(H_hat, (list,tuple))

        H_names = []
        for i in xrange(len(H_hat)):
            H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) ))

        assert len(H_hat) == len(self.rbms)

        if no_v_bias:
            v_bias_contrib = 0.
        else:
            v_bias_contrib = T.dot(V_hat, self.bias_vis)


        assert len(V_hat.type.broadcastable) == 2
        assert len(self.W[0].type.broadcastable) == 2
        assert len(H_hat[0].type.broadcastable) == 2

        interm1 = T.dot(V_hat, self.W[0])
        assert len(interm1.type.broadcastable) == 2
        interm2 = interm1 * H_hat[0]
        assert len(interm2.type.broadcastable) == 2

        v_weights_contrib = interm2.sum(axis=1)

        v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')'
        assert len(v_weights_contrib.type.broadcastable) == 1

        total = v_bias_contrib + v_weights_contrib

        for i in xrange(len(H_hat) - 1):
            lower_H = H_hat[i]
            higher_H = H_hat[i+1]
            #exp_lh = T.dot(lower_H.T, higher_H) / m
            lower_bias = self.bias_hid[i]
            W = self.W[i+1]

            lower_bias_contrib = T.dot(lower_H, lower_bias)

            #weights_contrib = T.sum( W * exp_lh) / m
            weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1)

            cur_contrib = lower_bias_contrib + weights_contrib
            assert len(cur_contrib.type.broadcastable) == 1
            total = total + cur_contrib

        highest_bias_contrib = T.dot(H_hat[-1], self.bias_hid[-1])

        total = total + highest_bias_contrib

        if Y_hat is not None:
            weights_contrib = (T.dot(H_hat[-1], self.W_class) * Y_hat).sum(axis=1)
            assert weights_contrib.ndim == 1
            bias_contrib = T.dot(Y_hat, self.bias_class)
            assert bias_contrib.ndim == 1
            total = total + weights_contrib + bias_contrib

        assert len(total.type.broadcastable) == 1

        rval =  - total

        #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')'

        return rval
Example #6
0
    def expected_energy(self, V_hat, H_hat, Y_hat = None, no_v_bias = False):
        """
        .. todo::

            WRITEME properly

        expected energy of the model under the mean field distribution
        defined by V_hat and H_hat
        alternately, could be expectation of the energy function across
        a batch of examples, where every element of V_hat and H_hat is
        a binary observation
        if no_v_bias is True, ignores the contribution from biases on visible units
        """

        assert (Y_hat is None) == (self.num_classes == 0)

        V_name = make_name(V_hat, 'anon_V_hat')
        assert isinstance(H_hat, (list,tuple))

        H_names = []
        for i in xrange(len(H_hat)):
            H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) ))

        m = V_hat.shape[0]
        m.name = V_name + '.shape[0]'

        assert len(H_hat) == len(self.rbms)

        v = T.mean(V_hat, axis=0)

        if no_v_bias:
            v_bias_contrib = 0.
        else:
            v_bias_contrib = T.dot(v, self.bias_vis)

        #exp_vh = T.dot(V_hat.T,H_hat[0]) / m

        #v_weights_contrib = T.sum(self.W[0] * exp_vh)

        v_weights_contrib = (T.dot(V_hat, self.W[0]) * H_hat[0]).sum(axis=1).mean()

        v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')'

        total = v_bias_contrib + v_weights_contrib

        for i in xrange(len(H_hat) - 1):
            lower_H = H_hat[i]
            low = T.mean(lower_H, axis = 0)
            higher_H = H_hat[i+1]
            #exp_lh = T.dot(lower_H.T, higher_H) / m
            lower_bias = self.bias_hid[i]
            W = self.W[i+1]

            lower_bias_contrib = T.dot(low, lower_bias)

            #weights_contrib = T.sum( W * exp_lh) / m
            weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1).mean()

            total = total + lower_bias_contrib + weights_contrib

        highest_bias_contrib = T.dot(T.mean(H_hat[-1],axis=0), self.bias_hid[-1])

        total = total + highest_bias_contrib

        assert len(total.type.broadcastable) == 0

        if Y_hat is not None:
            weights_contrib = (T.dot(H_hat[-1], self.W_class) * Y_hat).sum(axis=1).mean()
            bias_contrib = T.dot(T.mean(Y_hat,axis=0), self.bias_class)
            total = total + weights_contrib + bias_contrib

        rval =  - total

        #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')'

        return rval
Example #7
0
    def __call__(self, X, Y = None, X_space=None):
        """
        Provides the mask for multi-prediction training. A 1 in the mask
        corresponds to a variable that should be used as an input to the
        inference process. A 0 corresponds to a variable that should be
        used as a prediction target of the multi-prediction training
        criterion.

        Parameters
        ----------
        X : Variable
            A batch of input features to mask for multi-prediction training
        Y : Variable
            A batch of input class labels to mask for multi-prediction
            Training

        Returns
        -------
        drop_mask : Variable
            A Theano expression for a random binary mask in the same shape as
            `X`
        drop_mask_Y : Variable, only returned if `Y` is not None
            A Theano expression for a random binary mask in the same shape as
            `Y`

        Notes
        -----
        Calling this repeatedly will yield the same random numbers each time.
        """
        assert X_space is not None
        self.called = True
        assert X.dtype == config.floatX
        theano_rng = make_theano_rng(getattr(self, 'seed', None), default_seed,
                                     which_method="binomial")

        if X.ndim == 2 and self.sync_channels:
            raise NotImplementedError()

        p = self.drop_prob

        if not hasattr(self, 'drop_prob_y') or self.drop_prob_y is None:
            yp = p
        else:
            yp = self.drop_prob_y

        batch_size = X_space.batch_size(X)

        if self.balance:
            flip = theano_rng.binomial(
                    size = (batch_size,),
                    p = 0.5,
                    n = 1,
                    dtype = X.dtype)

            yp = flip * (1-p) + (1-flip) * p

            dimshuffle_args = ['x'] * X.ndim

            if X.ndim == 2:
                dimshuffle_args[0] = 0
                assert not self.sync_channels
            else:
                dimshuffle_args[X_space.axes.index('b')] = 0
                if self.sync_channels:
                    del dimshuffle_args[X_space.axes.index('c')]

            flip = flip.dimshuffle(*dimshuffle_args)

            p = flip * (1-p) + (1-flip) * p

        # size needs to have a fixed length at compile time or the
        # theano random number generator will be angry
        size = tuple([ X.shape[i] for i in xrange(X.ndim) ])
        if self.sync_channels:
            del size[X_space.axes.index('c')]

        drop_mask = theano_rng.binomial(
                    size = size,
                    p = p,
                    n = 1,
                    dtype = X.dtype)

        X_name = make_name(X, 'anon_X')
        drop_mask.name = 'drop_mask(%s)' % X_name

        if Y is not None:
            assert isinstance(yp, float) or yp.ndim < 2
            drop_mask_Y = theano_rng.binomial(
                    size = (batch_size, ),
                    p = yp,
                    n = 1,
                    dtype = X.dtype)
            assert drop_mask_Y.ndim == 1
            Y_name = make_name(Y, 'anon_Y')
            drop_mask_Y.name = 'drop_mask_Y(%s)' % Y_name
            return drop_mask, drop_mask_Y

        return drop_mask
Example #8
0
    def __call__(self, X, Y = None, X_space=None):
        """
        .. todo::

            WRITEME

        Note that calling this repeatedly will yield the same random numbers each time.
        """
        assert X_space is not None
        self.called = True
        assert X.dtype == config.floatX
        if not hasattr(self, 'seed'):
            self.seed = default_seed
        theano_rng = RandomStreams(self.seed)

        if X.ndim == 2 and self.sync_channels:
            raise NotImplementedError()

        p = self.drop_prob

        if not hasattr(self, 'drop_prob_y') or self.drop_prob_y is None:
            yp = p
        else:
            yp = self.drop_prob_y

        batch_size = X_space.batch_size(X)

        if self.balance:
            flip = theano_rng.binomial(
                    size = (batch_size,),
                    p = 0.5,
                    n = 1,
                    dtype = X.dtype)

            yp = flip * (1-p) + (1-flip) * p

            dimshuffle_args = ['x'] * X.ndim

            if X.ndim == 2:
                dimshuffle_args[0] = 0
                assert not self.sync_channels
            else:
                dimshuffle_args[X_space.axes.index('b')] = 0
                if self.sync_channels:
                    del dimshuffle_args[X_space.axes.index('c')]

            flip = flip.dimshuffle(*dimshuffle_args)

            p = flip * (1-p) + (1-flip) * p

        #size needs to have a fixed length at compile time or the
        #theano random number generator will be angry
        size = tuple([ X.shape[i] for i in xrange(X.ndim) ])
        if self.sync_channels:
            del size[X_space.axes.index('c')]

        drop_mask = theano_rng.binomial(
                    size = size,
                    p = p,
                    n = 1,
                    dtype = X.dtype)

        X_name = make_name(X, 'anon_X')
        drop_mask.name = 'drop_mask(%s)' % X_name

        if Y is not None:
            assert isinstance(yp, float) or yp.ndim < 2
            drop_mask_Y = theano_rng.binomial(
                    size = (batch_size, ),
                    p = yp,
                    n = 1,
                    dtype = X.dtype)
            assert drop_mask_Y.ndim == 1
            Y_name = make_name(Y, 'anon_Y')
            drop_mask_Y.name = 'drop_mask_Y(%s)' % Y_name
            #drop_mask = Print('drop_mask',attrs=['sum'])(drop_mask)
            #drop_mask_Y = Print('drop_mask_Y',attrs=['sum'])(drop_mask_Y)
            return drop_mask, drop_mask_Y

        return drop_mask