def set_input_space(self, space):

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        rng = self.mlp.rng

        W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.dim))

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None
예제 #2
0
    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        rng = self.dbm.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                                 self.irange,
                                 (self.input_dim, self.dim)) * \
                    (rng.uniform(0.,1., (self.input_dim, self.dim))
                     < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.dim))
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None
예제 #3
0
파일: dpcn.py 프로젝트: EderSantana/mdpcn
    def set_input_space(self, space):
        
        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if self.fprop_code==True:
            self.output_space = VectorSpace(self.dim)
        else:
            self.output_space = VectorSpace(self.input_dim)

        rng = self.mlp.rng
        W = rng.randn(self.input_dim, self.dim)
        self.W = sharedX(W.T, self.layer_name + '_W')
        self.transformer = MatrixMul(self.W)
        self.W, = self.transformer.get_params()
        b = np.zeros((self.input_dim,))
        self.b = sharedX(b, self.layer_name + '_b') # We need both to pass input_dim valid
        X = .001 * rng.randn(self.batch_size, self.dim)
        self.X = sharedX(X, self.layer_name + '_X')
        self._params = [self.W, self.b, self.X]
        self.state_below = T.zeros((self.batch_size, self.input_dim))
예제 #4
0
파일: dbm.py 프로젝트: kfatyas/pylearn
    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not (self.detector_layer_dim % self.pool_size == 0):
            raise ValueError(
                "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d"
                % (self.detector_layer_dim, self.pool_size,
                   self.detector_layer_dim % self.pool_size))

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = self.detector_layer_dim / self.pool_size
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.dbm.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                                 self.irange,
                                 (self.input_dim, self.detector_layer_dim)) * \
                    (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                     < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            for i in xrange(self.detector_layer_dim):
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0:
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None
    def set_input_space(self, space):

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        rng = self.mlp.rng

        W = rng.uniform(-self.irange,
                        self.irange,
                        (self.input_dim, self.dim))

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None
예제 #6
0
    def _prepare_generator(self, generator, noise_space,
                           condition_distribution, new_W_irange, input_source):
        noise_dim = noise_space.get_total_dimension()
        condition_dim = self.condition_space.get_total_dimension()

        first_layer = generator.mlp.layers[0]
        pretrain_W, _ = first_layer.get_param_values()

        rng = generator.mlp.rng
        new_W = np.vstack((pretrain_W,
                           rng.uniform(-new_W_irange, new_W_irange,
                                       (condition_dim, pretrain_W.shape[1]))))
        new_W = sharedX(new_W)
        new_W.name = first_layer.get_params()[0].name + '_retrain'

        first_layer.transformer = MatrixMul(new_W)
        first_layer.input_space = CompositeSpace(
            components=[noise_space, self.condition_space])
        generator.mlp.input_space = first_layer.input_space

        # HACK!
        generator.mlp._input_source = input_source

        return ConditionalGenerator(
            generator.mlp,
            input_condition_space=self.condition_space,
            condition_distribution=condition_distribution,
            noise_dim=noise_dim)
예제 #7
0
    def set_input_space(self, space):
        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        self.rng = self.mlp.rng

        # sanity checking
        assert self.dictionary.input_dim == self.input_dim
        assert self.dictionary.size >= self.dim

        indices = self.rng.permutation(self.dictionary.size)
        indices = indices[:self.dim]
        indices.sort()

        W = self.dictionary.get_subdictionary(indices)

        # dictionary atoms are stored in rows but transformers expect them to
        # be in columns.
        W = sharedX(W.T)
        W.name = self.layer_name + "_W"
        self.transformer = MatrixMul(W)
예제 #8
0
파일: mlp.py 프로젝트: renjupaul/pylearn
    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)


        if not (self.detector_layer_dim % self.pool_size == 0):
            raise ValueError("detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" %
                             (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size))

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = self.detector_layer_dim / self.pool_size
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.
            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None

        if self.mask_weights is not None:
            expected_shape =  (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape "+str(expected_shape)+" but got "+str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)
예제 #9
0
파일: adam.py 프로젝트: cc13ny/galatea
class Adam:
    def __init__(self, batch_size, alpha, irange):
        self.alpha = alpha
        self.visible_layer = GaussianConvolutionalVisLayer(rows = 32,cols = 32, channels = 3, init_beta =1., init_mu = 0.)
        self.hidden_layers = [ Softmax(n_classes = 10,
                                            irange = .01) ]
        rng = np.random.RandomState([2012,8,20])
        self.W = MatrixMul( sharedX( rng.uniform(-irange, irange, (108,1600))))
        #make_random_conv2D(irange = .05, input_space = self.visible_layer.get_input_space(),
        #                output_space = Conv2DSpace([27,27],1600),
        #                kernel_shape = (6,6),
        #                batch_size = batch_size)
        self.batch_size = batch_size
        self.hidden_layers[0].dbm = self
        self.hidden_layers[0].set_input_space(Conv2DSpace([2,2],3200))

    def get_params(self):
        return set(self.hidden_layers[0].get_params()).union(self.W.get_params())

    def mf(self, X):
        patches = cifar10neighbs(X,(6,6))
        patches -= patches.mean(axis=1).dimshuffle(0,'x')
        patches /= T.sqrt(T.sqr(patches).sum(axis=1)+10.0).dimshuffle(0,'x')

        Z = self.W.lmul(patches)

        #Z = Print('Z',attrs=['min','mean','max'])(Z)

        Z = T.concatenate((Z,-Z),axis=1)
        Z = multichannel_neibs2imgs(Z, self.batch_size, 27, 27, 3200, 1, 1)
        Z = Z.dimshuffle(0,3,1,2)
        p = max_pool_2d(Z,(14,14),False)
        p = p.dimshuffle(0,1,2,3)
        p = T.maximum(p - self.alpha, 0.)
        #p = Print('p',attrs=['min','mean','max'])(p)
        y = self.hidden_layers[0].mf_update(state_below = p, state_above = None)
        return [ Z, y ]

    def get_weights_topo(self):
        outp, inp, rows, cols = range(4)
        raw = self.W._filters.get_value()
        return np.transpose(raw,(outp,rows,cols,inp))
예제 #10
0
파일: adam.py 프로젝트: cc13ny/galatea
 def __init__(self, batch_size, alpha, irange):
     self.alpha = alpha
     self.visible_layer = GaussianConvolutionalVisLayer(rows = 32,cols = 32, channels = 3, init_beta =1., init_mu = 0.)
     self.hidden_layers = [ Softmax(n_classes = 10,
                                         irange = .01) ]
     rng = np.random.RandomState([2012,8,20])
     self.W = MatrixMul( sharedX( rng.uniform(-irange, irange, (108,1600))))
     #make_random_conv2D(irange = .05, input_space = self.visible_layer.get_input_space(),
     #                output_space = Conv2DSpace([27,27],1600),
     #                kernel_shape = (6,6),
     #                batch_size = batch_size)
     self.batch_size = batch_size
     self.hidden_layers[0].dbm = self
     self.hidden_layers[0].set_input_space(Conv2DSpace([2,2],3200))
예제 #11
0
파일: cdpcn.py 프로젝트: EderSantana/mdpcn
    def set_input_space(self, space):
        
        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if self.fprop_code==True:
            self.output_space = VectorSpace(self.dim)
        else:
            self.output_space = VectorSpace(self.input_dim)

        rng = self.mlp.rng
        W = rng.randn(self.input_dim, self.dim)
        self.W = sharedX(W.T, self.layer_name + '_W')
        self.transformer = MatrixMul(self.W)
        self.W, = self.transformer.get_params()
        b = np.zeros((self.input_dim,))
        self.b = sharedX(b, self.layer_name + '_b') # We need both to pass input_dim valid
        X = .001 * rng.randn(self.batch_size, self.dim)
        self.X = sharedX(X, self.layer_name + '_X')
        S = rng.normal(0, .001, size=(self.batch_size, self.input_dim))
        self.S = sharedX(S, self.layer_name + '_S')
        self._params = [self.W, self.b]
        #self.state_below = T.zeros((self.batch_size, self.input_dim))
        
        cost = self.get_local_cost()
        self.opt = top.Optimizer(self.X, cost,  
                                 method='rmsprop', 
                                 learning_rate=self.lr, momentum=.9)

        self._reconstruction = theano.function([], T.dot(self.X, self.W))
예제 #12
0
파일: cdpcn.py 프로젝트: EderSantana/mdpcn
    def set_input_space(self, space):
        
        self.input_space = space
        assert isinstance(space, CompositeSpace)
        self.input_dim = []
        self.desired_space = []
        for sp in space.components:
            if isinstance(sp, VectorSpace):
                self.requires_reformat = False
                self.input_dim.append(sp.dim)
            else:
                self.requires_reformat = True
                self.input_dim.append(sp.get_total_dimension())
                self.desired_space.append( VectorSpace(self.input_dim[-1]) )

        if self.fprop_code==True:
            self.output_space = VectorSpace(self.dim)
        else:
            #self.output_space = VectorSpace(self.input_dim)
            # TODO: return composite space
            raise NotImplementedError

        rng = self.mlp.rng
        self.W = []
        self.S = []
        self.b = []
        self.transformer = []
        self._params = []
        
        X = .001 * rng.randn(self.batch_size, self.dim)
        self.X = sharedX(X, self.layer_name + '_X')
        
        for c in range(len(self.input_space.components)):
            W = rng.randn(self.input_dim[c], self.dim)
            self.W += [ sharedX(W.T, self.layer_name + '_W' + str(c)) ]
            self.transformer += [ MatrixMul(self.W[c]) ]
            self.W[-1], = self.transformer[-1].get_params()
            b = np.zeros((self.input_dim[c],))
            self.b += [ sharedX(b, self.layer_name + '_b' + str(c)) ] # We need both to pass input_dim valid
            S = rng.normal(0, .001, size=(self.batch_size, self.input_dim[c]))
            self.S += [ sharedX(S, self.layer_name + '_S' + str(c)) ]
            self._params += [self.W[-1], self.b[-1]]
            #self.state_below = T.zeros((self.batch_size, self.input_dim))
            
        cost = self.get_local_cost()
        self.opt = top.Optimizer(self.X, cost,  
                                 method='rmsprop', 
                                 learning_rate=self.lr, momentum=.9)
예제 #13
0
파일: dbm.py 프로젝트: deigen/pylearn
    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)


        if not (self.detector_layer_dim % self.pool_size == 0):
            raise ValueError("detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" %
                    (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size))

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = self.detector_layer_dim / self.pool_size
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.dbm.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                                 self.irange,
                                 (self.input_dim, self.detector_layer_dim)) * \
                    (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                     < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            for i in xrange(self.detector_layer_dim):
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0:
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None
예제 #14
0
    def setUpClass(cls):
        cls.test_m = 2

        cls.rng = N.random.RandomState([1, 2, 3])
        cls.nv = 3
        cls.nh = 4

        cls.vW = cls.rng.randn(cls.nv, cls.nh)
        cls.W = sharedX(cls.vW)
        cls.vbv = as_floatX(cls.rng.randn(cls.nv))
        cls.bv = T.as_tensor_variable(cls.vbv)
        cls.bv.tag.test_value = cls.vbv
        cls.vbh = as_floatX(cls.rng.randn(cls.nh))
        cls.bh = T.as_tensor_variable(cls.vbh)
        cls.bh.tag.test_value = cls.bh
        cls.vsigma = as_floatX(cls.rng.uniform(0.1, 5))
        cls.sigma = T.as_tensor_variable(cls.vsigma)
        cls.sigma.tag.test_value = cls.vsigma

        cls.E = GRBM_Type_1(transformer=MatrixMul(cls.W),
                            bias_vis=cls.bv,
                            bias_hid=cls.bh,
                            sigma=cls.sigma)

        cls.V = T.matrix()
        cls.V.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nv))
        cls.H = T.matrix()
        cls.H.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nh))

        cls.E_func = function([cls.V, cls.H], cls.E([cls.V, cls.H]))
        cls.F_func = function([cls.V], cls.E.free_energy(cls.V))
        cls.log_P_H_given_V_func = \
            function([cls.H, cls.V], cls.E.log_P_H_given_V(cls.H, cls.V))
        cls.score_func = function([cls.V], cls.E.score(cls.V))

        cls.F_of_V = cls.E.free_energy(cls.V)
        cls.dummy = T.sum(cls.F_of_V)
        cls.negscore = T.grad(cls.dummy, cls.V)
        cls.score = -cls.negscore

        cls.generic_score_func = function([cls.V], cls.score)
예제 #15
0
파일: dbm.py 프로젝트: kfatyas/pylearn
class BinaryVectorMaxPool(HiddenLayer):
    """
        A hidden layer that does max-pooling on binary vectors.
        It has two sublayers, the detector layer and the pooling
        layer. The detector layer is its downward state and the pooling
        layer is its upward state.

        TODO: this layer uses (pooled, detector) as its total state,
              which can be confusing when listing all the states in
              the network left to right. Change this and
              pylearn2.expr.probabilistic_max_pooling to use
              (detector, pooled)
    """
    def __init__(self,
                 detector_layer_dim,
                 pool_size,
                 layer_name,
                 irange=None,
                 sparse_init=None,
                 include_prob=1.0,
                 init_bias=0.):
        """

            include_prob: probability of including a weight element in the set
                    of weights initialized to U(-irange, irange). If not included
                    it is initialized to 0.

        """
        self.__dict__.update(locals())
        del self.self

        self.b = sharedX(np.zeros((self.detector_layer_dim, )) + init_bias,
                         name=layer_name + '_b')

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not (self.detector_layer_dim % self.pool_size == 0):
            raise ValueError(
                "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d"
                % (self.detector_layer_dim, self.pool_size,
                   self.detector_layer_dim % self.pool_size))

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = self.detector_layer_dim / self.pool_size
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.dbm.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                                 self.irange,
                                 (self.input_dim, self.detector_layer_dim)) * \
                    (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                     < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            for i in xrange(self.detector_layer_dim):
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0:
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

    def get_total_state_space(self):
        return CompositeSpace((self.output_space, self.h_space))

    def get_params(self):
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        return self.transformer.get_params().union([self.b])

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float)
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()
        return W.get_value()

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decidew how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total / cols
        return rows, cols

    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W, = self.transformer.get_params()

        W = W.T

        W = W.reshape((self.detector_layer_dim, self.input_space.shape[0],
                       self.input_space.shape[1], self.input_space.nchannels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    def upward_state(self, total_state):
        p, h = total_state
        self.h_space.validate(h)
        self.output_space.validate(p)
        return p

    def downward_state(self, total_state):
        p, h = total_state
        return h

    def get_monitoring_channels_from_state(self, state):

        P, H = state

        rval = {}

        if self.pool_size == 1:
            vars_and_prefixes = [(P, '')]
        else:
            vars_and_prefixes = [(P, 'p_'), (H, 'h_')]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            for key, val in [('max_max', v_max.max()),
                             ('max_mean', v_max.mean()),
                             ('max_min', v_max.min()),
                             ('min_max', v_min.max()),
                             ('min_mean', v_min.mean()),
                             ('min_max', v_min.max()),
                             ('range_max', v_range.max()),
                             ('range_mean', v_range.mean()),
                             ('range_min', v_range.min()),
                             ('mean_max', v_mean.max()),
                             ('mean_mean', v_mean.mean()),
                             ('mean_min', v_mean.min())]:
                rval[prefix + key] = val

        return rval

    def get_l1_act_cost(self, state, target, coeff, eps=None):
        rval = 0.

        P, H = state
        self.output_space.validate(P)
        self.h_space.validate(H)

        if self.pool_size == 1:
            # If the pool size is 1 then pools = detectors
            # and we should not penalize pools and detectors separately
            assert len(state) == 2
            assert isinstance(target, float)
            assert isinstance(coeff, float)
            _, state = state
            state = [state]
            target = [target]
            coeff = [coeff]
            if eps is None:
                eps = [0.]
            else:
                eps = [eps]
        else:
            assert all([len(elem) == 2 for elem in [state, target, coeff]])
            if eps is None:
                eps = [0., 0.]
            if target[1] < target[0]:
                warnings.warn(
                    "Do you really want to regularize the detector units to be sparser than the pooling units?"
                )

        for s, t, c, e in safe_zip(state, target, coeff, eps):
            assert all([isinstance(elem, float) for elem in [t, c, e]])
            if c == 0.:
                continue
            m = s.mean(axis=0)
            assert m.ndim == 1
            rval += T.maximum(abs(m - t) - e, 0.).mean() * c

        return rval

    def sample(self,
               state_below=None,
               state_above=None,
               layer_above=None,
               theano_rng=None):

        if theano_rng is None:
            raise ValueError(
                "theano_rng is required; it just defaults to None so that it may appear after layer_above / state_above in the list."
            )

        if state_above is not None:
            msg = layer_above.downward_message(state_above)
        else:
            msg = None

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below) + self.b
        p, h, p_sample, h_sample = max_pool_channels(z, self.pool_size, msg,
                                                     theano_rng)

        return p_sample, h_sample

    def downward_message(self, downward_state):
        rval = self.transformer.lmul_T(downward_state)

        if self.requires_reformat:
            rval = self.desired_space.format_as(rval, self.input_space)

        return rval

    def make_state(self, num_examples, numpy_rng):
        """ Returns a shared variable containing an actual state
           (not a mean field state) for this variable.
        """

        t1 = time.time()

        empty_input = self.h_space.get_origin_batch(num_examples)
        h_state = sharedX(empty_input)

        default_z = T.zeros_like(h_state) + self.b

        theano_rng = MRG_RandomStreams(numpy_rng.randint(2**16))

        p_exp, h_exp, p_sample, h_sample = max_pool_channels(
            z=default_z, pool_size=self.pool_size, theano_rng=theano_rng)

        assert h_sample.dtype == default_z.dtype

        p_state = sharedX(self.output_space.get_origin_batch(num_examples))

        t2 = time.time()

        f = function([], updates={p_state: p_sample, h_state: h_sample})

        t3 = time.time()

        f()

        t4 = time.time()

        print str(self) + '.make_state took', t4 - t1
        print '\tcompose time:', t2 - t1
        print '\tcompile time:', t3 - t2
        print '\texecute time:', t4 - t3

        p_state.name = 'p_sample_shared'
        h_state.name = 'h_sample_shared'

        return p_state, h_state

    def expected_energy_term(self, state, average, state_below, average_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" %
                            (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x, y: x * y,
                                  sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        downward_state = self.downward_state(state)
        self.h_space.validate(downward_state)

        # Energy function is linear so it doesn't matter if we're averaging or not
        # Specifically, our terms are -u^T W d - b^T d where u is the upward state of layer below
        # and d is the downward state of this layer

        bias_term = T.dot(downward_state, self.b)
        weights_term = (self.transformer.lmul(state_below) *
                        downward_state).sum(axis=1)

        rval = -bias_term - weights_term

        assert rval.ndim == 1

        return rval

    def mf_update(self,
                  state_below,
                  state_above,
                  layer_above=None,
                  double_weights=False,
                  iter_name=None):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" %
                            (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x, y: x * y,
                                  sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        if iter_name is None:
            iter_name = 'anon'

        if state_above is not None:
            assert layer_above is not None
            msg = layer_above.downward_message(state_above)
            msg.name = 'msg_from_' + layer_above.layer_name + '_to_' + self.layer_name + '[' + iter_name + ']'
        else:
            msg = None

        if double_weights:
            state_below = 2. * state_below
            state_below.name = self.layer_name + '_' + iter_name + '_2state'
        z = self.transformer.lmul(state_below) + self.b
        if self.layer_name is not None and iter_name is not None:
            z.name = self.layer_name + '_' + iter_name + '_z'
        p, h = max_pool_channels(z, self.pool_size, msg)

        p.name = self.layer_name + '_p_' + iter_name
        h.name = self.layer_name + '_h_' + iter_name

        return p, h
예제 #16
0
파일: dpcn.py 프로젝트: EderSantana/mdpcn
class SparseCodingLayer(Linear):
    
    def __init__(self, batch_size, fprop_code=True, lr=.01, n_steps=10, truncate=-1, *args, **kwargs):
        '''
        Parameters for the optimization/feedforward operation:
        lr      : learning rate
        n_steps : number of steps or uptades of the hidden code
        truncate: truncate the gradient after this number (default -1 which means do not truncate)
        '''
        super(SparseCodingLayer, self).__init__(*args, **kwargs)
        self.batch_size = batch_size
        self.fprop_code = fprop_code
        self.n_steps = n_steps
        self.truncate = truncate
        self.lr = lr
        self._scan_updates = OrderedDict()

    @wraps(Linear.set_input_space)
    def set_input_space(self, space):
        
        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if self.fprop_code==True:
            self.output_space = VectorSpace(self.dim)
        else:
            self.output_space = VectorSpace(self.input_dim)

        rng = self.mlp.rng
        W = rng.randn(self.input_dim, self.dim)
        self.W = sharedX(W.T, self.layer_name + '_W')
        self.transformer = MatrixMul(self.W)
        self.W, = self.transformer.get_params()
        b = np.zeros((self.input_dim,))
        self.b = sharedX(b, self.layer_name + '_b') # We need both to pass input_dim valid
        X = .001 * rng.randn(self.batch_size, self.dim)
        self.X = sharedX(X, self.layer_name + '_X')
        self._params = [self.W, self.b, self.X]
        self.state_below = T.zeros((self.batch_size, self.input_dim))

    def _renormW(self):
        A = self.W.get_value(borrow=True)
        A = np.dot(A.T, np.diag(1./np.sqrt(np.sum(A**2, axis=1)))).T
        self.W.set_value( A )
    
    def get_local_cost(self,state_below):
        er = T.sqr(state_below - T.dot(self.X, self.W)).sum()
        l1 = T.sqrt(T.sqr(self.X) + 1e-6).sum()
        return er + .1 * l1
        
    def get_sparse_code(self, state_below):

        def _optimization_step(Xt, accum, vt, S):
                
            '''
            Note that this is the RMSprop update. 
            Thus, we running gradient updates inside scan (the dream)
            
            TODO: put this a better place.
            I tried to make if a method of self, but I'm not sure how to tell 
            theano.scan that the first argument of the function is a non_sequence
            '''
            
            rho = .9
            momentum = .9
            lr = self.lr
            Y = T.dot(Xt, self.W) #+ self.b
            err = (S - Y) ** 2
            l1 = T.sqrt(Xt**2 + 1e-6)
            cost = err.sum() + .1 * l1.sum()
            gX = T.grad(cost, Xt)
            new_accum = rho * accum + (1-rho) * gX**2
            v = momentum * vt  - lr * gX / T.sqrt(new_accum + 1e-8)
            X = Xt + momentum * v - lr * gX / T.sqrt(new_accum + 1e-8)
            return [X, new_accum, v]

        # Renorm W
        self._renormW()
        
        rng = self.mlp.rng
        #X = rng.randn(self.batch_size, self.dim)
        #self.X = sharedX(X, 'SparseCodingLinear_X')
        '''
        accum = T.zeros_like(self.X)
        vt = T.zeros_like(self.X)
        [Xfinal,_,_], updates = theano.scan(fn=_optimization_step,
                     outputs_info=[self.X, accum, vt], 
                     non_sequences=[state_below], 
                     n_steps=self.n_steps, truncate_gradient=self.truncate)
        
        self._scan_updates.update(updates)

        self.Xout = Xfinal[-1]
        #self.Xout = (2*T.ge(self.Xout, 0.)-1) * T.maximum(abs(self.Xout) - .01, 0.)
        self.state_below = state_below
        #self.local_reconstruction_error = \
        #        ((state_below - T.dot(self.Xout, self.W) - 0*self.b) ** 2).sum() + \
        #                   .1 * T.sqrt(self.Xout**2 + 1e-6).sum()
        '''
        return self.X #out
    
    @wraps(Layer._modify_updates)
    def _modify_updates(self, updates):
        updates.update(self._scan_updates)

    @wraps(Layer.fprop)
    def fprop(self, state_below):
        
        if self.fprop_code == True:
            rval = self.get_sparse_code(state_below)
            rval = T.switch(rval > 0., rval, 0.)
        else:
            # Fprops the filtered input instead
            rval = T.dot(self.get_sparse_code(state_below), self.W)
        
        return rval
        
    @functools.wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self, state_below=None,
                                      state=None, targets=None):

        #sc = abs(self.Xout).sum() #Get last local_error get_local_error()
        #le = self.local_reconstruction_error 
        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_min = row_norms.min()
        row_norms_min.__doc__ = ("The smallest norm of any row of the "
                                 "weight matrix W. This is a measure of the "
                                 "least influence any visible unit has.")
        '''
        rval = OrderedDict([('row_norms_min',  row_norms_min),
                            ('row_norms_mean', row_norms.mean()),
                            ('row_norms_max',  row_norms.max()),
                            ('col_norms_min',  col_norms.min()),
                            ('col_norms_mean', col_norms.mean()),
                            ('col_norms_max',  col_norms.max())])#,
                            #('sparse_code_l1_norm', sc.mean())])
        '''
        rval = OrderedDict()

        
        if False:
            #(state is not None) or (state_below is not None):
            if state is None:
                state = self.fprop(state_below)

            P = state
            #if self.pool_size == 1:
            vars_and_prefixes = [(P, '')]
            #else:
            #    vars_and_prefixes = [(P, 'p_')]

            for var, prefix in vars_and_prefixes:
                v_max = var.max(axis=0)
                v_min = var.min(axis=0)
                v_mean = var.mean(axis=0)
                v_range = v_max - v_min

                # max_x.mean_u is "the mean over *u*nits of the max over
                # e*x*amples" The x and u are included in the name because
                # otherwise its hard to remember which axis is which when
                # reading the monitor I use inner.outer
                # rather than outer_of_inner or
                # something like that because I want mean_x.* to appear next to
                # each other in the alphabetical list, as these are commonly
                # plotted together
                for key, val in [('max_x.max_u', v_max.max()),
                                 ('max_x.mean_u', v_max.mean()),
                                 ('max_x.min_u', v_max.min()),
                                 ('min_x.max_u', v_min.max()),
                                 ('min_x.mean_u', v_min.mean()),
                                 ('min_x.min_u', v_min.min()),
                                 ('range_x.max_u', v_range.max()),
                                 ('range_x.mean_u', v_range.mean()),
                                 ('range_x.min_u', v_range.min()),
                                 ('mean_x.max_u', v_mean.max()),
                                 ('mean_x.mean_u', v_mean.mean()),
                                 ('mean_x.min_u', v_mean.min())]:
                    rval[prefix+key] = val
       
       
        return rval    
예제 #17
0
파일: mlp.py 프로젝트: renjupaul/pylearn
class RectifiedLinear(Layer):
    """
        WRITEME
    """

    def __init__(self,
                 dim,
                 layer_name,
                 irange = None,
                 istdev = None,
                 sparse_init = None,
                 sparse_stdev = 1.,
                 include_prob = 1.0,
                 init_bias = 0.,
                 W_lr_scale = None,
                 b_lr_scale = None,
                 mask_weights = None,
                 left_slope = 0.0,
                 copy_input = 0,
                 max_row_norm = None):
        """

            include_prob: probability of including a weight element in the set
            of weights initialized to U(-irange, irange). If not included
            it is initialized to 0.

            """
        self.__dict__.update(locals())
        del self.self

        self.b = sharedX( np.zeros((self.dim,)) + init_bias, name = layer_name + '_b')

    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim + self.copy_input * self.input_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.istdev is None
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.dim)) * \
                (rng.uniform(0.,1., (self.input_dim, self.dim))
                 < self.include_prob)
        elif self.istdev is not None:
            assert self.sparse_init is None
            W = rng.randn(self.input_dim, self.dim) * self.istdev
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.dim))
            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.
            for i in xrange(self.dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None

        if self.mask_weights is not None:
            expected_shape =  (self.input_dim, self.dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape "+str(expected_shape)+" but got "+str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)

    def censor_updates(self, updates):

        if self.mask_weights is not None:
            W ,= self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

        if self.max_row_norm is not None:
            W ,= self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                row_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=1))
                desired_norms = T.clip(row_norms, 0, self.max_row_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + row_norms)).dimshuffle(0, 'x')


    def get_params(self):
        assert self.b.name is not None
        W ,= self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W ,= self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W ,= self.transformer.get_params()
        return W.get_value()

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W ,= self.transformer.get_params()

        W = W.T

        W = W.reshape((self.dim, self.input_space.shape[0],
                       self.input_space.shape[1], self.input_space.nchannels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    def get_monitoring_channels(self):

        W ,= self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict([
                            ('row_norms_min'  , row_norms.min()),
                            ('row_norms_mean' , row_norms.mean()),
                            ('row_norms_max'  , row_norms.max()),
                            ('col_norms_min'  , col_norms.min()),
                            ('col_norms_mean' , col_norms.mean()),
                            ('col_norms_max'  , col_norms.max()),
                            ])

    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b
        if self.layer_name is not None:
            z.name = self.layer_name + '_z'

        p = z * (z > 0.) + self.left_slope * z * (z < 0.)

        if self.copy_input:
            p = T.concatenate((p, state_below), axis=1)

        return p
예제 #18
0
파일: discomax.py 프로젝트: vd114/galatea
class Discomax(Layer):
    """
    A hidden layer that does max pooling over groups of linear
    units. If you use this code in a research project, please
    cite

    "Maxout Networks" Ian J. Goodfellow, David Warde-Farley,
    Mehdi Mirza, Aaron Courville, and Yoshua Bengio. ICML 2013


    Parameters
    ----------
    layer_name : str
        A name for this layer that will be prepended to monitoring channels
        related to this layer. Each layer in an MLP must have a unique
        name.
    num_units : int
        The number of maxout units to use in this layer.
    num_pieces: int
        The number of linear pieces to use in each maxout unit.
    pool_stride : int, optional
        The distance between the start of each max pooling region. Defaults
        to num_pieces, which makes the pooling regions disjoint. If set to
        a smaller number, can do overlapping pools.
    randomize_pools : bool, optional
        If True, does max pooling over randomized subsets of the linear
        responses, rather than over sequential subsets.
    irange : float, optional
        If specified, initializes each weight randomly in
        U(-irange, irange)
    sparse_init : int, optional
        if specified, irange must not be specified.
        This is an integer specifying how many weights to make non-zero.
        All non-zero weights will be initialized randomly in
        N(0, sparse_stdev^2)
    sparse_stdev : float, optional
        WRITEME
    include_prob : float, optional
        probability of including a weight element in the set
        of weights initialized to U(-irange, irange). If not included
        a weight is initialized to 0. This defaults to 1.
    init_bias : float or ndarray, optional
        A value that can be broadcasted to a numpy vector.
        All biases are initialized to this number.
    W_lr_scale: float, optional
        The learning rate on the weights for this layer is multiplied by
        this scaling factor
    b_lr_scale: float, optional
        The learning rate on the biases for this layer is multiplied by
        this scaling factor
    max_col_norm: float, optional
        The norm of each column of the weight matrix is constrained to
        have at most this norm. If unspecified, no constraint. Constraint
        is enforced by re-projection (if necessary) at the end of each
        update.
    max_row_norm: float, optional
        Like max_col_norm, but applied to the rows.
    mask_weights: ndarray, optional
        A binary matrix multiplied by the weights after each update,
        allowing you to restrict their connectivity.
    min_zero: bool, optional
        If true, includes a zero in the set we take a max over for each
        maxout unit. This is equivalent to pooling over rectified
        linear units.
    """
    def __str__(self):
        """
        Returns
        -------
        rval : str
            A string representation of the object. In this case, just the
            class name.
        """
        return "Maxout"

    def __init__(self,
                 layer_name,
                 num_units,
                 num_pieces,
                 pool_stride=None,
                 randomize_pools=False,
                 irange=None,
                 sparse_init=None,
                 sparse_stdev=1.,
                 include_prob=1.0,
                 init_bias=0.,
                 W_lr_scale=None,
                 b_lr_scale=None,
                 max_col_norm=None,
                 max_row_norm=None,
                 mask_weights=None,
                 min_zero=False):

        super(Discomax, self).__init__()

        detector_layer_dim = num_units * num_pieces
        pool_size = num_pieces

        if pool_stride is None:
            pool_stride = pool_size

        self.__dict__.update(locals())
        del self.self

        self.b = sharedX(np.zeros((self.detector_layer_dim, )) + init_bias,
                         name=(layer_name + '_b'))
        self.ofs = sharedX(np.zeros((self.detector_layer_dim, )),
                           name=(layer_name + '_ofs'))

        if max_row_norm is not None:
            raise NotImplementedError()

    @functools.wraps(Model.get_lr_scalers)
    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """
        Tells the layer to use the specified input space.

        This resets parameters! The weight matrix is initialized with the
        size needed to receive input from this space.

        Parameters
        ----------
        space : Space
            The Space that the input will lie in.
        """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not (0 == (
            (self.detector_layer_dim - self.pool_size) % self.pool_stride)):
            if self.pool_stride == self.pool_size:
                raise ValueError("detector_layer_dim = %d, pool_size = %d. "
                                 "Should be divisible but remainder is %d" %
                                 (self.detector_layer_dim, self.pool_size,
                                  self.detector_layer_dim % self.pool_size))
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = (
            (self.detector_layer_dim - self.pool_size) / self.pool_stride + 1)
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0., 1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))

            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.

            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros(
                (self.detector_layer_dim, self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i, j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape = (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape " +
                                 str(expected_shape) + " but got " +
                                 str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)

    def _modify_updates(self, updates):
        """
        Replaces the values in `updates` if needed to enforce the options set
        in the __init__ method, including `mask_weights` and `max_col_norm`.

        Parameters
        ----------
        updates : OrderedDict
            A dictionary mapping parameters (including parameters not
            belonging to this model) to updated values of those parameters.
            The dictionary passed in contains the updates proposed by the
            learning algorithm. This function modifies the dictionary
            directly. The modified version will be compiled and executed
            by the learning algorithm.
        """

        # Patch old pickle files
        if not hasattr(self, 'mask_weights'):
            self.mask_weights = None

        if self.mask_weights is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

        if self.max_col_norm is not None:
            assert self.max_row_norm is None
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        if self.ofs in updates:
            updates[self.ofs] = T.clip(updates[self.ofs], 0., 1e6)

    @functools.wraps(Model.get_params)
    def get_params(self):
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        rval.append(self.ofs)
        return rval

    @functools.wraps(Layer.get_weight_decay)
    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    @functools.wraps(Layer.get_l1_weight_decay)
    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.abs_(W).sum()

    @functools.wraps(Model.get_weights)
    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()
        W = W.get_value()

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            warnings.warn("randomize_pools makes get_weights multiply by the "
                          "permutation matrix. If you call set_weights(W) and "
                          "then call get_weights(), the return value will "
                          "WP not W.")
            P = self.permute.get_value()
            return np.dot(W, P)

        return W

    @functools.wraps(Layer.set_weights)
    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    @functools.wraps(Layer.set_biases)
    def set_biases(self, biases):
        self.b.set_value(biases)

    @functools.wraps(Layer.get_biases)
    def get_biases(self):
        return self.b.get_value()

    @functools.wraps(Model.get_weights_format)
    def get_weights_format(self):
        return ('v', 'h')

    @functools.wraps(Model.get_weights_view_shape)
    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decide how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total // cols
        if rows * cols < total:
            rows = rows + 1
        return rows, cols

    @functools.wraps(Model.get_weights_topo)
    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        # There was an implementation of this, but it was broken
        raise NotImplementedError()

    @functools.wraps(Layer.get_monitoring_channels)
    def get_monitoring_channels(self):
        warnings.warn("Layer.get_monitoring_channels is " +
                      "deprecated. Use get_layer_monitoring_channels " +
                      "instead. Layer.get_monitoring_channels " +
                      "will be removed on or after september 24th 2014",
                      stacklevel=2)

        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_min = row_norms.min()
        row_norms_min.__doc__ = ("The smallest norm of any row of the "
                                 "weight matrix W. This is a measure of the "
                                 "least influence any visible unit has.")

        return OrderedDict([
            ('row_norms_min', row_norms_min),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

    @functools.wraps(Layer.get_monitoring_channels_from_state)
    def get_monitoring_channels_from_state(self, state):
        warnings.warn("Layer.get_monitoring_channels_from_state is " +
                      "deprecated. Use get_layer_monitoring_channels " +
                      "instead. Layer.get_monitoring_channels_from_state " +
                      "will be removed on or after september 24th 2014",
                      stacklevel=2)

        P = state

        rval = OrderedDict()

        if self.pool_size == 1:
            vars_and_prefixes = [(P, '')]
        else:
            vars_and_prefixes = [(P, 'p_')]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over
            # e*x*amples" The x and u are included in the name because
            # otherwise its hard to remember which axis is which when reading
            # the monitor I use inner.outer rather than outer_of_inner or
            # something like that because I want mean_x.* to appear next to
            # each other in the alphabetical list, as these are commonly
            # plotted together
            for key, val in [('max_x.max_u', v_max.max()),
                             ('max_x.mean_u', v_max.mean()),
                             ('max_x.min_u', v_max.min()),
                             ('min_x.max_u', v_min.max()),
                             ('min_x.mean_u', v_min.mean()),
                             ('min_x.min_u', v_min.min()),
                             ('range_x.max_u', v_range.max()),
                             ('range_x.mean_u', v_range.mean()),
                             ('range_x.min_u', v_range.min()),
                             ('mean_x.max_u', v_mean.max()),
                             ('mean_x.mean_u', v_mean.mean()),
                             ('mean_x.min_u', v_mean.min())]:
                rval[prefix + key] = val

        return rval

    @functools.wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self,
                                      state_below=None,
                                      state=None,
                                      targets=None):

        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_min = row_norms.min()
        row_norms_min.__doc__ = ("The smallest norm of any row of the "
                                 "weight matrix W. This is a measure of the "
                                 "least influence any visible unit has.")

        rval = OrderedDict([
            ('row_norms_min', row_norms_min),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

        if (state is not None) or (state_below is not None):
            if state is None:
                state = self.fprop(state_below)

            P = state
            if self.pool_size == 1:
                vars_and_prefixes = [(P, '')]
            else:
                vars_and_prefixes = [(P, 'p_')]

            for var, prefix in vars_and_prefixes:
                v_max = var.max(axis=0)
                v_min = var.min(axis=0)
                v_mean = var.mean(axis=0)
                v_range = v_max - v_min

                # max_x.mean_u is "the mean over *u*nits of the max over
                # e*x*amples" The x and u are included in the name because
                # otherwise its hard to remember which axis is which when
                # reading the monitor I use inner.outer
                # rather than outer_of_inner or
                # something like that because I want mean_x.* to appear next to
                # each other in the alphabetical list, as these are commonly
                # plotted together
                for key, val in [('max_x.max_u', v_max.max()),
                                 ('max_x.mean_u', v_max.mean()),
                                 ('max_x.min_u', v_max.min()),
                                 ('min_x.max_u', v_min.max()),
                                 ('min_x.mean_u', v_min.mean()),
                                 ('min_x.min_u', v_min.min()),
                                 ('range_x.max_u', v_range.max()),
                                 ('range_x.mean_u', v_range.mean()),
                                 ('range_x.min_u', v_range.min()),
                                 ('mean_x.max_u', v_mean.max()),
                                 ('mean_x.mean_u', v_mean.mean()),
                                 ('mean_x.min_u', v_mean.min())]:
                    rval[prefix + key] = val

        return rval

    @functools.wraps(Layer.fprop)
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        z = T.switch(z > 0., z + self.ofs, z)

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        if self.min_zero:
            p = 0.
        else:
            p = None

        last_start = self.detector_layer_dim - self.pool_size
        for i in xrange(self.pool_size):
            cur = z[:, i:last_start + i + 1:self.pool_stride]
            if p is None:
                p = cur
            else:
                p = T.maximum(cur, p)

        p.name = self.layer_name + '_p_'

        return p
예제 #19
0
파일: mlp.py 프로젝트: renjupaul/pylearn
class SoftmaxPool(Layer):
    """
        A hidden layer that uses the softmax function to do
        max pooling over groups of units.
        When the pooling size is 1, this reduces to a standard
        sigmoidal MLP layer.
        """

    def __init__(self,
                 detector_layer_dim,
                 pool_size,
                 layer_name,
                 irange = None,
                 sparse_init = None,
                 sparse_stdev = 1.,
                 include_prob = 1.0,
                 init_bias = 0.,
                 W_lr_scale = None,
                 b_lr_scale = None,
                 mask_weights = None,
        ):
        """

            include_prob: probability of including a weight element in the set
            of weights initialized to U(-irange, irange). If not included
            it is initialized to 0.

            """
        self.__dict__.update(locals())
        del self.self

        self.b = sharedX( np.zeros((self.detector_layer_dim,)) + init_bias, name = layer_name + '_b')

    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)


        if not (self.detector_layer_dim % self.pool_size == 0):
            raise ValueError("detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" %
                             (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size))

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = self.detector_layer_dim / self.pool_size
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.
            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None

        if self.mask_weights is not None:
            expected_shape =  (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape "+str(expected_shape)+" but got "+str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)

    def censor_updates(self, updates):

        # Patch old pickle files
        if not hasattr(self, 'mask_weights'):
            self.mask_weights = None

        if self.mask_weights is not None:
            W ,= self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

    def get_params(self):
        assert self.b.name is not None
        W ,= self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W ,= self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W ,= self.transformer.get_params()
        return W.get_value()

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decide how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total / cols
        return rows, cols


    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W ,= self.transformer.get_params()

        W = W.T

        W = W.reshape((self.detector_layer_dim, self.input_space.shape[0],
                       self.input_space.shape[1], self.input_space.nchannels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    def get_monitoring_channels(self):

        W ,= self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict([
                            ('row_norms_min'  , row_norms.min()),
                            ('row_norms_mean' , row_norms.mean()),
                            ('row_norms_max'  , row_norms.max()),
                            ('col_norms_min'  , col_norms.min()),
                            ('col_norms_mean' , col_norms.mean()),
                            ('col_norms_max'  , col_norms.max()),
                            ])


    def get_monitoring_channels_from_state(self, state):

        P = state

        rval = OrderedDict()

        if self.pool_size == 1:
            vars_and_prefixes = [ (P,'') ]
        else:
            vars_and_prefixes = [ (P, 'p_') ]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over e*x*amples"
            # The x and u are included in the name because otherwise its hard
            # to remember which axis is which when reading the monitor
            # I use inner.outer rather than outer_of_inner or something like that
            # because I want mean_x.* to appear next to each other in the alphabetical
            # list, as these are commonly plotted together
            for key, val in [
                             ('max_x.max_u', v_max.max()),
                             ('max_x.mean_u', v_max.mean()),
                             ('max_x.min_u', v_max.min()),
                             ('min_x.max_u', v_min.max()),
                             ('min_x.mean_u', v_min.mean()),
                             ('min_x.min_u', v_min.min()),
                             ('range_x.max_u', v_range.max()),
                             ('range_x.mean_u', v_range.mean()),
                             ('range_x.min_u', v_range.min()),
                             ('mean_x.max_u', v_mean.max()),
                             ('mean_x.mean_u', v_mean.mean()),
                             ('mean_x.min_u', v_mean.min())
                             ]:
                rval[prefix+key] = val

        return rval

    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b
        if self.layer_name is not None:
            z.name = self.layer_name + '_z'
        p,h = max_pool_channels(z, self.pool_size)

        p.name = self.layer_name + '_p_'

        return p
예제 #20
0
파일: cdpcn.py 프로젝트: EderSantana/mdpcn
class SparseCodingLayer(Linear):
    
    def __init__(self, batch_size, fprop_code=True, lr=.01, n_steps=10, lbda=0, top_most=False, 
            nonlinearity=RectifierConvNonlinearity(),*args, **kwargs):
        '''
        Compiled version: the sparse code is calulated using 'top' and is not just simbolic.
        Parameters for the optimization/feedforward operation:
        lr      : learning rate
        n_steps : number of steps or uptades of the hidden code
        truncate: truncate the gradient after this number (default -1 which means do not truncate)
        '''
        super(SparseCodingLayer, self).__init__(*args, **kwargs)
        self.batch_size = batch_size
        self.fprop_code = fprop_code
        self.n_steps = n_steps
        self.lr = lr
        self.lbda = lbda
        self.top_most = top_most
        self.nonlin = nonlinearity

    @wraps(Linear.set_input_space)
    def set_input_space(self, space):
        
        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if self.fprop_code==True:
            self.output_space = VectorSpace(self.dim)
        else:
            self.output_space = VectorSpace(self.input_dim)

        rng = self.mlp.rng
        W = rng.randn(self.input_dim, self.dim)
        self.W = sharedX(W.T, self.layer_name + '_W')
        self.transformer = MatrixMul(self.W)
        self.W, = self.transformer.get_params()
        b = np.zeros((self.input_dim,))
        self.b = sharedX(b, self.layer_name + '_b') # We need both to pass input_dim valid
        X = .001 * rng.randn(self.batch_size, self.dim)
        self.X = sharedX(X, self.layer_name + '_X')
        S = rng.normal(0, .001, size=(self.batch_size, self.input_dim))
        self.S = sharedX(S, self.layer_name + '_S')
        self._params = [self.W, self.b]
        #self.state_below = T.zeros((self.batch_size, self.input_dim))
        
        cost = self.get_local_cost()
        self.opt = top.Optimizer(self.X, cost,  
                                 method='rmsprop', 
                                 learning_rate=self.lr, momentum=.9)

        self._reconstruction = theano.function([], T.dot(self.X, self.W))
    
    def get_local_cost(self):
        er = T.sqr(self.S - T.dot(self.X, self.W)).sum()
        l1 = T.sqrt(T.sqr(self.X) + 1e-6).sum()
        top_down = self.get_top_down_flow()
        return er + .1 * l1 + top_down
    
    def update_top_state(self, state_above=None):
        if self.lbda is not 0:
            assert state_above is not None
            self.top_flow.set_value(state_above)     
    
    def get_nonlin_output(self):
        return self.nonlin(self.X)

    def get_top_down_flow(self):
        if self.lbda == 0:
            rval = 0.
        elif self.top_flow == True:
            rval = (self.lbda * (self.top_flow - self.X)**2).sum()
        else:
            out = self.get_nonlin_output()
            rval = (self.lbda * (self.top_flow - out)**2).sum()

        return rval

    def _renormW(self):
        A = self.W.get_value(borrow=True)
        A = np.dot(A.T, np.diag(1./np.sqrt(np.sum(A**2, axis=1)))).T
        self.W.set_value( A )
  
    def get_reconstruction(self):
        return self._reconstruction()

    def get_sparse_code(self, state_below):

        # Renorm W
        self._renormW()

        if hasattr(state_below, 'get_value'):
            #print '!!!! state_below does have get_value'
            self.S.set_value(state_below.get_value(borrow=True))
            self.opt.run(self.n_steps) 
                         

        if isinstance(state_below, np.ndarray):
            self.S.set_value(state_below.astype('float32'))
            self.opt.run(self.n_steps) #, 
            #np.arange(self.batch_size))


        return self.X

    @wraps(Layer.fprop)
    def fprop(self, state_below):
        
        self._renormW()
        rval = self.get_sparse_code(state_below)
        if self.fprop_code == True:
            #rval = T.switch(rval > 0., rval, 0.)
            rval = self.nonlin.apply(rval)
        else:
            # Fprops the filtered input instead
            rval = T.dot(rval, self.W)
        
        return rval
    
    @wraps(Layer.get_params)
    def get_params(self):
        return self.W

    @functools.wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self, state_below=None,
                                      state=None, targets=None):

        #sc = abs(self.Xout).sum() #Get last local_error get_local_error()
        #le = self.local_reconstruction_error 
        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_min = row_norms.min()
        row_norms_min.__doc__ = ("The smallest norm of any row of the "
                                 "weight matrix W. This is a measure of the "
                                 "least influence any visible unit has.")
        '''
        rval = OrderedDict([('row_norms_min',  row_norms_min),
                            ('row_norms_mean', row_norms.mean()),
                            ('row_norms_max',  row_norms.max()),
                            ('col_norms_min',  col_norms.min()),
                            ('col_norms_mean', col_norms.mean()),
                            ('col_norms_max',  col_norms.max())])#,
                            #('sparse_code_l1_norm', sc.mean())])
        '''
        rval = OrderedDict()

        
        if False:
            #(state is not None) or (state_below is not None):
            if state is None:
                state = self.fprop(state_below)

            P = state
            #if self.pool_size == 1:
            vars_and_prefixes = [(P, '')]
            #else:
            #    vars_and_prefixes = [(P, 'p_')]

            for var, prefix in vars_and_prefixes:
                v_max = var.max(axis=0)
                v_min = var.min(axis=0)
                v_mean = var.mean(axis=0)
                v_range = v_max - v_min

                # max_x.mean_u is "the mean over *u*nits of the max over
                # e*x*amples" The x and u are included in the name because
                # otherwise its hard to remember which axis is which when
                # reading the monitor I use inner.outer
                # rather than outer_of_inner or
                # something like that because I want mean_x.* to appear next to
                # each other in the alphabetical list, as these are commonly
                # plotted together
                for key, val in [('max_x.max_u', v_max.max()),
                                 ('max_x.mean_u', v_max.mean()),
                                 ('max_x.min_u', v_max.min()),
                                 ('min_x.max_u', v_min.max()),
                                 ('min_x.mean_u', v_min.mean()),
                                 ('min_x.min_u', v_min.min()),
                                 ('range_x.max_u', v_range.max()),
                                 ('range_x.mean_u', v_range.mean()),
                                 ('range_x.min_u', v_range.min()),
                                 ('mean_x.max_u', v_mean.max()),
                                 ('mean_x.mean_u', v_mean.mean()),
                                 ('mean_x.min_u', v_mean.min())]:
                    rval[prefix+key] = val
       
       
        return rval    
    def set_input_space(self, space):

        self.input_space = space

        if not isinstance(space, Conv2DSpace):
            raise BadInputSpaceError("ConvRectifiedLinear.set_input_space "
                                     "expected a Conv2DSpace, got " +
                                     str(space) + " of type " +
                                     str(type(space)))

        rng = self.mlp.rng

        if self.border_mode == 'valid':
            output_shape = [(self.input_space.shape[0]-self.kernel_shape[0]) /
                            self.kernel_stride[0] + 1,
                            (self.input_space.shape[1]-self.kernel_shape[1]) /
                            self.kernel_stride[1] + 1]
        elif self.border_mode == 'full':
            output_shape = [(self.input_space.shape[0]+self.kernel_shape[0]) /
                            self.kernel_stride[0] - 1,
                            (self.input_space.shape[1]+self.kernel_shape[1]) /
                            self.kernel_stride[1] - 1]

        self.detector_space = Conv2DSpace(shape=output_shape,
                                          num_channels=self.output_channels,
                                          axes=('b', 'c', 0, 1))

        if self.irange is not None:
            assert self.sparse_init is None
            self.transformer = conv2d.make_random_conv2D(
                irange=self.irange,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)
        elif self.sparse_init is not None:
            self.transformer = conv2d.make_sparse_random_conv2D(
                num_nonzero=self.sparse_init,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)

        W, = self.transformer.get_params()
        W.name = 'W'
        self.b = sharedX(np.zeros(((self.num_pieces*self.output_channels),)) + self.init_bias)
        self.b.name = 'b'

        print 'Input shape: ', self.input_space.shape
        print 'Detector space: ', self.detector_space.shape

        assert self.pool_type in ['max', 'mean']

        dummy_batch_size = self.mlp.batch_size
        if dummy_batch_size is None:
            dummy_batch_size = 2
        dummy_detector = sharedX(
            self.detector_space.get_origin_batch(dummy_batch_size))
            
            
        #dummy_p = dummy_p.eval()
        self.output_space = Conv2DSpace(shape=[1, 1],
                                        num_channels=self.output_channels,
                                        axes=('b', 'c', 0, 1))
										
        W = rng.uniform(-self.irange,self.irange,(426, (self.num_pieces*self.output_channels)))
        W = sharedX(W)
        W.name = self.layer_name + "_w"
        self.transformer = MatrixMul(W)
		
        print 'Output space: ', self.output_space.shape
예제 #22
0
nv = 3
nh = 4

vW = rng.randn(nv, nh)
W = sharedX(vW)
vbv = as_floatX(rng.randn(nv))
bv = T.as_tensor_variable(vbv)
bv.tag.test_value = vbv
vbh = as_floatX(rng.randn(nh))
bh = T.as_tensor_variable(vbh)
bh.tag.test_value = bh
vsigma = as_floatX(rng.uniform(0.1, 5))
sigma = T.as_tensor_variable(vsigma)
sigma.tag.test_value = vsigma

E = GRBM_Type_1(transformer=MatrixMul(W), bias_vis=bv,
                bias_hid=bh, sigma=sigma)

V = T.matrix()
V.tag.test_value = as_floatX(rng.rand(test_m, nv))
H = T.matrix()
H.tag.test_value = as_floatX(rng.rand(test_m, nh))

E_func = function([V, H], E([V, H]))
F_func = function([V], E.free_energy(V))
log_P_H_given_V_func = function([H, V], E.log_P_H_given_V(H, V))
score_func = function([V], E.score(V))

F_of_V = E.free_energy(V)
dummy = T.sum(F_of_V)
negscore = T.grad(dummy, V)
예제 #23
0
def test_matrixmul():
    """
    Tests matrix multiplication for a range of different
    dtypes. Checks both normal and transpose multiplication
    using randomly generated matrices.
    """
    rng = np.random.RandomState(222)
    dtypes = ['int16', 'int32', 'int64', 'float64', 'float32']
    tensor_x = [
        tensor.wmatrix(),
        tensor.imatrix(),
        tensor.lmatrix(),
        tensor.dmatrix(),
        tensor.fmatrix()
    ]
    np_W, np_x, np_x_T = [], [], []
    for dtype in dtypes:
        if 'int' in dtype:
            np_W.append(
                rng.randint(-10, 10,
                            rng.random_integers(5, size=2)).astype(dtype))
            np_x.append(
                rng.randint(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype))
            np_x_T.append(
                rng.randint(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype))
        elif 'float' in dtype:
            np_W.append(
                rng.uniform(-1, 1, rng.random_integers(5,
                                                       size=2)).astype(dtype))
            np_x.append(
                rng.uniform(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[0])).astype(dtype))
            np_x.append(
                rng.uniform(
                    -10, 10,
                    (rng.random_integers(5), np_W[-1].shape[1])).astype(dtype))
        else:
            assert False

    def sharedW(value, dtype):
        return theano.shared(theano._asarray(value, dtype=dtype))

    tensor_W = [sharedW(W, dtype) for W in np_W]
    matrixmul = [MatrixMul(W) for W in tensor_W]
    assert all(mm.get_params()[0] == W for mm, W in zip(matrixmul, tensor_W))

    fn = [
        theano.function([x], mm.lmul(x)) for x, mm in zip(tensor_x, matrixmul)
    ]
    fn_T = [
        theano.function([x], mm.lmul_T(x))
        for x, mm in zip(tensor_x, matrixmul)
    ]
    for W, x, x_T, f, f_T in zip(np_W, np_x, np_x_T, fn, fn_T):
        np.testing.assert_allclose(f(x), np.dot(x, W))
        np.testing.assert_allclose(f_T(x_T), np.dot(x_T, W.T))
예제 #24
0
class Powerup(Layer):

    def __init__(self,
                 layer_name,
                 num_units,
                 num_pieces,
                 batch_size,
                 pool_stride = None,
                 randomize_pools = False,
                 p_sampling_mode = "normal",
                 irange = None,
                 sparse_init = None,
                 p_mean = 2.0,
                 p_std = 0.005,
                 normalize = False,
                 power_prod=False,
                 sparse_stdev = 1.,
                 include_prob = 1.0,
                 upper_bound = None,
                 init_bias = 0.,
                 relu = False,
                 centered_bias = False,
                 power_activ = "softplus",
                 uniform_p_range = (1.5, 9.0),
                 add_noise = False,
                 post_bias = False,
                 p_lr_scale = None,
                 W_lr_scale = None,
                 b_lr_scale = None,
                 max_col_norm = None,
                 max_row_norm = None,
                 mask_weights = None,
                 min_zero = False):
        """
            layer_name: A name for this layer that will be prepended to
                        monitoring channels related to this layer.
            num_units: The number of maxout units to use in this layer.
            num_pieces: The number of linear pieces to use in each maxout
                        unit.
            pool_stride: The distance between the start of each max pooling
                        region. Defaults to num_pieces, which makes the
                        pooling regions disjoint. If set to a smaller number,
                        can do overlapping pools.
            randomize_pools: Does max pooling over randomized subsets of
                        the linear responses, rather than over sequential
                        subsets.
            irange: if specified, initializes each weight randomly in
                U(-irange, irange)
            sparse_init: if specified, irange must not be specified.
                        This is an integer specifying how many weights to make
                        non-zero. All non-zero weights will be initialized
                        randomly in N(0, sparse_stdev^2)
            include_prob: probability of including a weight element in the set
               of weights initialized to U(-irange, irange). If not included
               a weight is initialized to 0. This defaults to 1.
            init_bias: All biases are initialized to this number
            W_lr_scale: The learning rate on the weights for this layer is
                multiplied by this scaling factor
            b_lr_scale: The learning rate on the biases for this layer is
                multiplied by this scaling factor
            max_col_norm: The norm of each column of the weight matrix is
                constrained to have at most this norm. If unspecified, no
                constraint. Constraint is enforced by re-projection (if
                necessary) at the end of each update.
            max_row_norm: Like max_col_norm, but applied to the rows.
            mask_weights: A binary matrix multiplied by the weights after each
                         update, allowing you to restrict their connectivity.
            min_zero: If true, includes a zero in the set we take a max over
                    for each maxout unit. This is equivalent to pooling over
                    rectified linear units.
        """

        assert p_sampling_mode in ["uniform", "normal"]
        assert power_activ in ["rect", "exp", "softplus", "sqr", "softhalf"]
        assert type(uniform_p_range) == tuple

        detector_layer_dim = num_units * num_pieces
        pool_size = num_pieces

        self.normalize = normalize
        self.uniform_p_range = uniform_p_range

        self.upper_bound = upper_bound
        self.power_prod = power_prod
        self.power_activ = power_activ

        self.centered_bias = centered_bias
        self.p_sampling_mode = p_sampling_mode

        if pool_stride is None:
            pool_stride = pool_size

        self.__dict__.update(locals())

        del self.self

        if self.centered_bias:
            self.c = sharedX(np.zeros((self.detector_layer_dim,)), name=layer_name + "_c")

        if not self.post_bias:
            self.b = sharedX( np.zeros((self.detector_layer_dim,)) + init_bias, name = layer_name + '_b')
        else:
            self.b = sharedX( np.zeros((self.num_units,)) + init_bias, name = layer_name + '_b')

        if self.power_activ == "softplus":
            if self.p_sampling_mode == "uniform":
                self.p = sharedX(self.get_uniform_p_vals())
            else:
                self.p = sharedX(self.get_log_p(mean=p_mean, std=p_std))
        else:
            if self.p_sampling_mode == "uniform":
                self.p = sharedX(self.get_uniform_p_vals())
            else:
                self.p = sharedX(self.get_log_p(mean=p_mean, std=p_std))

        if max_row_norm is not None:
            raise NotImplementedError()

    def get_p_vals(self, mean=None, std=None):
        rng = np.random.RandomState(12435)
        p_vals = abs(rng.normal(loc=mean, scale=std, size=(self.num_units,)))
        return p_vals

    def get_uniform_p_vals(self, min=1.5, max=9):
        """
            Sample the values uniformly such that the initial value of
            softplus(.) + 1 is between min and max.
        """
        rng = np.random.RandomState(12435)
        p_vals = np.log(np.exp(rng.uniform(low=min, high=max, size=(self.num_units,))-1)-1)
        return p_vals

    def get_log_p(self, mean=None, std=None):
        rng = np.random.RandomState(12435)
        assert mean >= 1.0, "Mean should be greater than 1."
        if self.power_activ == "softplus":
            p_vals = np.log(rng.normal(loc=np.exp(mean-1), scale=std, size=(self.num_units,)) - 1)
        elif self.power_activ == "exp":
            p_vals = rng.normal(loc=np.log(mean-1), scale=std, size=(self.num_units,))
        else:
            p_vals = np.sqrt(rng.normal(loc=mean, scale=std, size=(self.num_units,)) - 1)
        #p_vals = np.log(np.exp(rng.normal(loc=mean, scale=std, size=(self.num_units,))-1) - 1)
        return p_vals

    def get_lr_scalers(self):
        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        if not hasattr(self, 'p_lr_scale'):
            self.p_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        if self.p_lr_scale is not None:
            rval[self.p] = self.p_lr_scale

        return rval

    def set_input_space(self, space):
        """
        Note: this resets parameters!
        """
        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.p.name = self.layer_name + "_p"

        if not ((self.detector_layer_dim - self.pool_size) % self.pool_stride == 0):
            if self.pool_stride == self.pool_size:
                raise ValueError("detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" %
                             (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size))
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = (self.detector_layer_dim - self.pool_size)/ self.pool_stride + 1
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.
            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros((self.detector_layer_dim, self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i,j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape =  (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape "+str(expected_shape)+" but got "+str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)

    def censor_updates(self, updates):
        # Patch old pickle files
        if not hasattr(self, 'mask_weights'):
            self.mask_weights = None

        if self.mask_weights is not None:
            W ,= self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

        if self.max_col_norm is not None:
            assert self.max_row_norm is None
            W ,= self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

    def get_params(self):
        assert self.b.name is not None
        assert self.p.name is not None
        W ,= self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        assert self.p not in rval
        rval.append(self.p)

        if self.centered_bias:
            assert self.c not in rval
            rval.append(self.c)

        return rval

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W ,= self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_p_decay(self, coeff, a):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        return coeff * T.sqr(self.p-a).sum()

    def get_p_mean_decay(self, coeff, a):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        return coeff * T.sqr(T.mean(self.p)-a).sum()


    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W ,= self.transformer.get_params()
        return coeff * abs(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()

        W ,= self.transformer.get_params()
        W = W.get_value()

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            warnings.warn("randomize_pools makes get_weights multiply by the permutation matrix. "
                          "If you call set_weights(W) and then call get_weights(), the return value will "
                          "WP not W.")

            P = self.permute.get_value()
            return np.dot(W,P)

        return W

    def set_power(self, p_val):
        self.p.set_value(p_val)

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_power(self):
        return self.p.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decide how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total // cols
        if rows * cols < total:
            rows = rows + 1
        return rows, cols


    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        # There was an implementation of this, but it was broken
        raise NotImplementedError()

    def get_monitoring_channels(self):

        W ,= self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        powers = self.p

        monitor_dict = OrderedDict([
                            ('power_min', powers.min()),
                            ('power_mean', powers.mean()),
                            ('power_max', powers.max()),
                            ('power_std', powers.std()),
                            ('b_min', self.b.min()),
                            ('b_mean', self.b.mean()),
                            ('b_max', self.b.max()),
                            ('row_norms_min'  , row_norms.min()),
                            ('row_norms_mean' , row_norms.mean()),
                            ('row_norms_max'  , row_norms.max()),
                            ('col_norms_min'  , col_norms.min()),
                            ('col_norms_mean' , col_norms.mean()),
                            ('col_norms_max'  , col_norms.max()),
                            ])

        if self.centered_bias:
            monitor_dict["c_min"] = self.c.min()
            monitor_dict["c_mean"] = self.c.mean()
            monitor_dict["c_max"] = self.c.max()
            monitor_dict["c_std"] = self.c.std()

        return monitor_dict

    def get_monitoring_channels_from_state(self, state):

        P = state

        rval = OrderedDict()

        if self.pool_size == 1:
            vars_and_prefixes = [ (P,'') ]
        else:
            vars_and_prefixes = [ (P, 'p_') ]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min
            # max_x.mean_u is "the mean over *u*nits of the max over e*x*amples"
            # The x and u are included in the name because otherwise its hard
            # to remember which axis is which when reading the monitor
            # I use inner.outer rather than outer_of_inner or something like that
            # because I want mean_x.* to appear next to each other in the alphabetical
            # list, as these are commonly plotted together
            for key, val in [
                             ('max_x.max_u', v_max.max()),
                             ('max_x.mean_u', v_max.mean()),
                             ('max_x.min_u', v_max.min()),
                             ('min_x.max_u', v_min.max()),
                             ('min_x.mean_u', v_min.mean()),
                             ('min_x.min_u', v_min.min()),
                             ('range_x.max_u', v_range.max()),
                             ('range_x.mean_u', v_range.mean()),
                             ('range_x.min_u', v_range.min()),
                             ('mean_x.max_u', v_mean.max()),
                             ('mean_x.mean_u', v_mean.mean()),
                             ('mean_x.min_u', v_mean.min())
                             ]:
                rval[prefix+key] = val
        return rval

    def get_power_activ(self, power_in):
        if self.power_activ == "exp":
            pT = T.exp(power_in) + 1
        elif self.power_activ == "rect":
            pT = T.maximum(power_in, 1)
        elif self.power_activ == "softplus":
            pT = T.nnet.softplus(power_in) + 1
        elif self.power_activ == "softhalf":
            pT = T.log(T.exp(power_in) + 0.5) + 1.0
        elif self.power_activ == "sqr":
            pT = T.sqr(power_in) + 1
        else:
            pT = abs(power_in) + 1
        return pT

    def fprop(self, state_below):
        #Implements (\sum_i^T 1/T |W_i x|^{p_j} )^(1/p_j)
        self.input_space.validate(state_below)
        epsilon = 1e-10

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.powerup.batch_size is %d but got shape of %d" % (self.mlp.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        if not self.post_bias:
            z = self.transformer.lmul(state_below) + self.b
        else:
            z = self.transformer.lmul(state_below)

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        #Reshape the presynaptic activation to a 3D tensor. Such that the first
        #dimension is the batch size, second dimension corresponds to number of
        #hidden units and the third dimension is for the size of the pool.
        z_pools = z.reshape((z.shape[0], self.num_units, self.pool_size))

        #Center the pools
        if self.centered_bias:
            c = self.c.reshape((self.num_units, self.pool_size))
            c = c.dimshuffle('x', 0, 1)
            z_pools = z_pools - c

        #Dimshuffle the p_j for |W_i x|^{p_j}
        power_in = self.p.dimshuffle('x', 0, 'x')
        p_j = self.get_power_activ(power_in)

        if self.relu:
            z_pools = T.maximum(z_pools, 0)
        else:
            z_pools = abs(z_pools)
            #For numerical stability,
            z_pools = T.maximum(z_pools, epsilon)

        z_pools = z_pools**p_j

        if self.normalize:
            z_summed_pools = (1. / self.pool_size) * T.sum(z_pools, axis=2)
        else:
            z_summed_pools = T.sum(z_pools, axis=2)

        #Stabilization for the backprop
        z_summed_pools = T.maximum(z_summed_pools, epsilon)

        #Dimshuffle the p_j for 1/p_j of
        #(\sum_i^T 1/T |W_i x|^{p_j} )^(1/p_j)
        power_in = self.p.dimshuffle('x', 0)
        p_j = self.get_power_activ(power_in)

        z_summed_pools = z_summed_pools**(1./p_j)

        if self.upper_bound is not None:
            z_summed_pools = T.maximum(z_summed_pools, self.upper_bound)

        if self.power_prod:
            a = power_in * z_summed_pools
        else:
            a = z_summed_pools

        if self.post_bias:
            a = a + self.b

        return a

    def stddev_bias(self, x, eps=1e-9, axis=0):
        mu = T.mean(x + eps, axis=axis)
        mu.name = "std_mean"
        var = T.mean((x - mu)**2 + eps)
        var.name = "std_variance"
        stddev = T.sqrt(var)
        return stddev

    def cost_from_cost_matrix(self, cost_matrix):
        return cost_matrix.sum(axis=1).mean()

    def cost_matrix(self, Y, Y_hat):
        return T.sqr(Y - Y_hat)
class WeightedLogNormalLogLikelihood(Layer):

    __metaclass__ = RNNWrapper

    def __init__(self, layer_name, irange=0.0, init_bias=0.):
        super(WeightedLogNormalLogLikelihood, self).__init__()
        self.__dict__.update(locals())
        del self.self
        self.dim = 2

        self.b = sharedX(np.zeros((self.dim, )) + init_bias,
                         name=(layer_name + '_b'))

    @wraps(Layer.set_input_space)
    def set_input_space(self, space):

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        rng = self.mlp.rng

        W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.dim))

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

    @wraps(Layer.get_params)
    def get_params(self):

        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b.name is not None
        assert self.b not in rval
        rval.append(self.b)
        return rval

    @wraps(Layer.get_weights)
    def get_weights(self):

        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()

        W = W.get_value()

        return W

    @wraps(Layer.set_weights)
    def set_weights(self, weights):

        W, = self.transformer.get_params()
        W.set_value(weights)

    @wraps(Layer.set_biases)
    def set_biases(self, biases):

        self.b.set_value(biases)

    @wraps(Layer.get_biases)
    def get_biases(self):
        """
        .. todo::
            WRITEME
        """
        return self.b.get_value()

    @wraps(Layer.get_weights_format)
    def get_weights_format(self):

        return ('v', 'h')

    @wraps(Layer.get_weights_topo)
    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W, = self.transformer.get_params()

        W = W.T

        W = W.reshape(
            (self.dim, self.input_space.shape[0], self.input_space.shape[1],
             self.input_space.num_channels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    @wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self,
                                      state_below=None,
                                      state=None,
                                      targets=None):
        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        rval = OrderedDict([
            ('row_norms_min', row_norms.min()),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

        if (state is not None) or (state_below is not None):
            if state is None:
                state = self.fprop(state_below)

            mx = state.max(axis=0)
            mean = state.mean(axis=0)
            mn = state.min(axis=0)
            rg = mx - mn

            rval['range_x_max_u'] = rg.max()
            rval['range_x_mean_u'] = rg.mean()
            rval['range_x_min_u'] = rg.min()

            rval['max_x_max_u'] = mx.max()
            rval['max_x_mean_u'] = mx.mean()
            rval['max_x_min_u'] = mx.min()

            rval['mean_x_max_u'] = mean.max()
            rval['mean_x_mean_u'] = mean.mean()
            rval['mean_x_min_u'] = mean.min()

            rval['min_x_max_u'] = mn.max()
            rval['min_x_mean_u'] = mn.mean()
            rval['min_x_min_u'] = mn.min()

        if targets:
            y_target = targets[:, 0]
            cost_multiplier = targets[:, 1]
            mean = state[:, 0]
            sigma = T.exp(state[:, 1])
            nll = self.logprob(y_target, mean, sigma)
            prob_vector = T.exp(-nll)
            rval['prob'] = (prob_vector * cost_multiplier).sum() / (
                1.0 * cost_multiplier.sum())
            rval['ppl'] = T.exp(
                (nll * cost_multiplier).sum() / (1.0 * cost_multiplier.sum()))
        return rval

    def _linear_part(self, state_below):
        """
        Parameters
        ----------
        state_below : member of input_space
        Returns
        -------
        output : theano matrix
            Affine transformation of state_below
        """
        self.input_space.validate(state_below)

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below)
        z += self.b

        if self.layer_name is not None:
            z.name = self.layer_name + '_z'

        return z

    @wraps(Layer.fprop)
    def fprop(self, state_below):
        p = self._linear_part(state_below)
        return p

    def logprob(self, y_target, mean, sigma):
        return (((T.log(y_target) - mean)**2 / (2 * sigma**2) +
                 T.log(y_target * sigma * T.sqrt(2 * np.pi))))

    @wraps(Layer.cost)
    def cost(self, Y, Y_hat):
        mean = Y_hat[:, 0]  # + 1.6091597151048114
        sigma = T.exp(Y_hat[:, 1])  # + 0.26165911509618789
        y_target = Y[:, 0]
        cost_multiplier = Y[:, 1]
        return (self.logprob(y_target, mean, sigma) * cost_multiplier).sum() \
            / (1.0 * cost_multiplier.sum())
예제 #26
0
    def set_input_space(self, space):

        self.input_space = space

        if not isinstance(space, Conv2DSpace):
            raise BadInputSpaceError("ConvRectifiedLinear.set_input_space "
                                     "expected a Conv2DSpace, got " +
                                     str(space) + " of type " +
                                     str(type(space)))

        rng = self.mlp.rng

        if self.border_mode == 'valid':
            output_shape = [
                (self.input_space.shape[0] - self.kernel_shape[0]) /
                self.kernel_stride[0] + 1,
                (self.input_space.shape[1] - self.kernel_shape[1]) /
                self.kernel_stride[1] + 1
            ]
        elif self.border_mode == 'full':
            output_shape = [
                (self.input_space.shape[0] + self.kernel_shape[0]) /
                self.kernel_stride[0] - 1,
                (self.input_space.shape[1] + self.kernel_shape[1]) /
                self.kernel_stride[1] - 1
            ]

        self.detector_space = Conv2DSpace(shape=output_shape,
                                          num_channels=self.output_channels,
                                          axes=('b', 'c', 0, 1))

        if self.irange is not None:
            assert self.sparse_init is None
            self.transformer = conv2d.make_random_conv2D(
                irange=self.irange,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)
        elif self.sparse_init is not None:
            self.transformer = conv2d.make_sparse_random_conv2D(
                num_nonzero=self.sparse_init,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)

        W, = self.transformer.get_params()
        W.name = 'W'
        self.b = sharedX(
            np.zeros(((self.num_pieces * self.output_channels), )) +
            self.init_bias)
        self.b.name = 'b'

        print 'Input shape: ', self.input_space.shape
        print 'Detector space: ', self.detector_space.shape

        assert self.pool_type in ['max', 'mean']

        dummy_batch_size = self.mlp.batch_size
        if dummy_batch_size is None:
            dummy_batch_size = 2
        dummy_detector = sharedX(
            self.detector_space.get_origin_batch(dummy_batch_size))

        #dummy_p = dummy_p.eval()
        self.output_space = Conv2DSpace(shape=[400, 1],
                                        num_channels=self.output_channels,
                                        axes=('b', 'c', 0, 1))

        W = rng.uniform(-self.irange, self.irange,
                        (426, (self.num_pieces * self.output_channels)))
        W = sharedX(W)
        W.name = self.layer_name + "_w"
        self.transformer = MatrixMul(W)

        print 'Output space: ', self.output_space.shape
예제 #27
0
    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        assert self.gater.get_input_space() == space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not ((self.detector_layer_dim - self.pool_size) % self.pool_stride
                == 0):
            if self.pool_stride == self.pool_size:
                raise ValueError(
                    "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d"
                    % (self.detector_layer_dim, self.pool_size,
                       self.detector_layer_dim % self.pool_size))
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = (self.detector_layer_dim -
                               self.pool_size) / self.pool_stride + 1
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))

            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.

            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros(
                (self.detector_layer_dim, self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i, j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape = (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape " +
                                 str(expected_shape) + " but got " +
                                 str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)
예제 #28
0
파일: discomax.py 프로젝트: cc13ny/galatea
    def set_input_space(self, space):
        """
        Tells the layer to use the specified input space.

        This resets parameters! The weight matrix is initialized with the
        size needed to receive input from this space.

        Parameters
        ----------
        space : Space
            The Space that the input will lie in.
        """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not (0 == ((self.detector_layer_dim - self.pool_size) %
                      self.pool_stride)):
            if self.pool_stride == self.pool_size:
                raise ValueError("detector_layer_dim = %d, pool_size = %d. "
                                 "Should be divisible but remainder is %d" %
                                 (self.detector_layer_dim,
                                  self.pool_size,
                                  self.detector_layer_dim % self.pool_size))
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = ((self.detector_layer_dim - self.pool_size) /
                               self.pool_stride + 1)
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0., 1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))

            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.

            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros((self.detector_layer_dim,
                                self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i, j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape = (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape " +
                                 str(expected_shape) +
                                 " but got " +
                                 str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)
class CpuConvMaxout(Layer):
    """
    .. todo::

        WRITEME
    """
    def __init__(self,
                 output_channels,
                 num_pieces,
                 kernel_shape,
                 pool_shape,
                 pool_stride,
                 layer_name,
                 irange=None,
                 border_mode='valid',
                 sparse_init=None,
                 include_prob=1.0,
                 init_bias=0.,
                 W_lr_scale=None,
                 b_lr_scale=None,
                 left_slope=0.0,
                 max_kernel_norm=None,
                 pool_type='max',
                 detector_normalization=None,
                 output_normalization=None,
                 kernel_stride=(1, 1)):
        """
        .. todo::

            WRITEME properly

         output_channels: The number of output channels the layer should have.
         kernel_shape: The shape of the convolution kernel.
         pool_shape: The shape of the spatial max pooling. A two-tuple of ints.
         pool_stride: The stride of the spatial max pooling. Also must be
                      square.
         layer_name: A name for this layer that will be prepended to
                     monitoring channels related to this layer.
         irange: if specified, initializes each weight randomly in
                 U(-irange, irange)
         border_mode: A string indicating the size of the output:
            full - The output is the full discrete linear convolution of the
                   inputs.
            valid - The output consists only of those elements that do not rely
                    on the zero-padding.(Default)
         include_prob: probability of including a weight element in the set
                       of weights initialized to U(-irange, irange). If not
                       included it is initialized to 0.
         init_bias: All biases are initialized to this number
         W_lr_scale: The learning rate on the weights for this layer is
                     multiplied by this scaling factor
         b_lr_scale: The learning rate on the biases for this layer is
                     multiplied by this scaling factor
         left_slope: **TODO**
         max_kernel_norm: If specifed, each kernel is constrained to have at
                          most this norm.
         pool_type: The type of the pooling operation performed the the
                    convolution. Default pooling type is max-pooling.
         detector_normalization, output_normalization:
              if specified, should be a callable object. the state of the
              network is optionally replaced with normalization(state) at each
              of the 3 points in processing:
                  detector: the maxout units can be normalized prior to the
                            spatial pooling
                  output: the output of the layer, after sptial pooling, can
                          be normalized as well
         kernel_stride: The stride of the convolution kernel. A two-tuple of
                        ints.
        """

        #super(ConvRectifiedLinear, self).__init__()

        if (irange is None) and (sparse_init is None):
            raise AssertionError("You should specify either irange or "
                                 "sparse_init when calling the constructor of "
                                 "ConvRectifiedLinear.")
        elif (irange is not None) and (sparse_init is not None):
            raise AssertionError("You should specify either irange or "
                                 "sparse_init when calling the constructor of "
                                 "ConvRectifiedLinear and not both.")

        self.__dict__.update(locals())
        del self.self

    @wraps(Layer.get_lr_scalers)
    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    @wraps(Layer.set_input_space)
    def set_input_space(self, space):

        self.input_space = space

        if not isinstance(space, Conv2DSpace):
            raise BadInputSpaceError("ConvRectifiedLinear.set_input_space "
                                     "expected a Conv2DSpace, got " +
                                     str(space) + " of type " +
                                     str(type(space)))

        rng = self.mlp.rng

        if self.border_mode == 'valid':
            output_shape = [(self.input_space.shape[0]-self.kernel_shape[0]) /
                            self.kernel_stride[0] + 1,
                            (self.input_space.shape[1]-self.kernel_shape[1]) /
                            self.kernel_stride[1] + 1]
        elif self.border_mode == 'full':
            output_shape = [(self.input_space.shape[0]+self.kernel_shape[0]) /
                            self.kernel_stride[0] - 1,
                            (self.input_space.shape[1]+self.kernel_shape[1]) /
                            self.kernel_stride[1] - 1]

        self.detector_space = Conv2DSpace(shape=output_shape,
                                          num_channels=self.output_channels,
                                          axes=('b', 'c', 0, 1))

        if self.irange is not None:
            assert self.sparse_init is None
            self.transformer = conv2d.make_random_conv2D(
                irange=self.irange,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)
        elif self.sparse_init is not None:
            self.transformer = conv2d.make_sparse_random_conv2D(
                num_nonzero=self.sparse_init,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)

        W, = self.transformer.get_params()
        W.name = 'W'
        self.b = sharedX(np.zeros(((self.num_pieces*self.output_channels),)) + self.init_bias)
        self.b.name = 'b'

        print 'Input shape: ', self.input_space.shape
        print 'Detector space: ', self.detector_space.shape

        assert self.pool_type in ['max', 'mean']

        dummy_batch_size = self.mlp.batch_size
        if dummy_batch_size is None:
            dummy_batch_size = 2
        dummy_detector = sharedX(
            self.detector_space.get_origin_batch(dummy_batch_size))
            
            
        #dummy_p = dummy_p.eval()
        self.output_space = Conv2DSpace(shape=[1, 1],
                                        num_channels=self.output_channels,
                                        axes=('b', 'c', 0, 1))
										
        W = rng.uniform(-self.irange,self.irange,(426, (self.num_pieces*self.output_channels)))
        W = sharedX(W)
        W.name = self.layer_name + "_w"
        self.transformer = MatrixMul(W)
		
        print 'Output space: ', self.output_space.shape

    @wraps(Layer.censor_updates)
    def censor_updates(self, updates):
        """
        .. todo::

            WRITEME
        """

        if self.max_kernel_norm is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                row_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=(1)))
                desired_norms = T.clip(row_norms, 0, self.max_kernel_norm)
                scales = desired_norms / (1e-7 + row_norms)
                updates[W] = updated_W * scales.dimshuffle(0, 'x')

    @wraps(Layer.get_params)
    def get_params(self):
        """
        .. todo::

            WRITEME
        """
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval

    @wraps(Layer.get_weight_decay)
    def get_weight_decay(self, coeff):

        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    @wraps(Layer.get_l1_weight_decay)
    def get_l1_weight_decay(self, coeff):

        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * abs(W).sum()

    @wraps(Layer.set_weights)
    def set_weights(self, weights):

        W, = self.transformer.get_params()
        W.set_value(weights)

    @wraps(Layer.set_biases)
    def set_biases(self, biases):

        self.b.set_value(biases)

    @wraps(Layer.get_biases)
    def get_biases(self):

        return self.b.get_value()

    @wraps(Layer.get_weights_format)
    def get_weights_format(self):

        return ('v', 'h')

    @wraps(Layer.get_weights_topo)
    def get_weights_topo(self):

        outp, inp, rows, cols = range(4)
        raw = self.transformer._filters.get_value()

        return np.transpose(raw, (outp, rows, cols, inp))

    @wraps(Layer.get_monitoring_channels)
    def get_monitoring_channels(self):

        W, = self.transformer.get_params()

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=(1)))

        return OrderedDict([('kernel_norms_min',  row_norms.min()),
                            ('kernel_norms_mean', row_norms.mean()),
                            ('kernel_norms_max',  row_norms.max()), ])

    @wraps(Layer.fprop)
    def fprop(self, state_below):

        self.input_space.validate(state_below)
        axes = self.input_space.axes
        #z = self.transformer.lmul(state_below) + self.b
        state_below = state_below.dimshuffle(3,1,2,0)	
        z = self.transformer.lmul(state_below) +self.b
        z = z.dimshuffle(0,3,1,2)

        if self.layer_name is not None:
            z.name = self.layer_name + '_z'

	    #ReLUs
        d = T.maximum(z, 0)
		
        # Max pooling between linear pieces
        # d = None
        # for i in xrange(self.num_pieces):
            # t = z[:,i::self.num_pieces,:,:]
            # if d is None:
                # d = t
            # else:
                # d = T.maximum(d, t)

        self.detector_space.validate(d)

        if not hasattr(self, 'detector_normalization'):
            self.detector_normalization = None

        if self.detector_normalization:
            d = self.detector_normalization(d)

        # NOTE : Custom pooling
        p = d.max(3)[:,:,None,:]

        self.output_space.validate(p)

        if not hasattr(self, 'output_normalization'):
            self.output_normalization = None

        if self.output_normalization:
            p = self.output_normalization(p)

        return p
    def upward_pass(self, inputs):
        """
        Wrapper to fprop functions for PretrainedLayer class

        Parameters
        ----------
        inputs : WRITEME

        Returns
        -------
        WRITEME
        """
        return self.fprop(inputs)
예제 #30
0
파일: discomax.py 프로젝트: cc13ny/galatea
class Discomax(Layer):
    """
    A hidden layer that does max pooling over groups of linear
    units. If you use this code in a research project, please
    cite

    "Maxout Networks" Ian J. Goodfellow, David Warde-Farley,
    Mehdi Mirza, Aaron Courville, and Yoshua Bengio. ICML 2013


    Parameters
    ----------
    layer_name : str
        A name for this layer that will be prepended to monitoring channels
        related to this layer. Each layer in an MLP must have a unique
        name.
    num_units : int
        The number of maxout units to use in this layer.
    num_pieces: int
        The number of linear pieces to use in each maxout unit.
    pool_stride : int, optional
        The distance between the start of each max pooling region. Defaults
        to num_pieces, which makes the pooling regions disjoint. If set to
        a smaller number, can do overlapping pools.
    randomize_pools : bool, optional
        If True, does max pooling over randomized subsets of the linear
        responses, rather than over sequential subsets.
    irange : float, optional
        If specified, initializes each weight randomly in
        U(-irange, irange)
    sparse_init : int, optional
        if specified, irange must not be specified.
        This is an integer specifying how many weights to make non-zero.
        All non-zero weights will be initialized randomly in
        N(0, sparse_stdev^2)
    sparse_stdev : float, optional
        WRITEME
    include_prob : float, optional
        probability of including a weight element in the set
        of weights initialized to U(-irange, irange). If not included
        a weight is initialized to 0. This defaults to 1.
    init_bias : float or ndarray, optional
        A value that can be broadcasted to a numpy vector.
        All biases are initialized to this number.
    W_lr_scale: float, optional
        The learning rate on the weights for this layer is multiplied by
        this scaling factor
    b_lr_scale: float, optional
        The learning rate on the biases for this layer is multiplied by
        this scaling factor
    max_col_norm: float, optional
        The norm of each column of the weight matrix is constrained to
        have at most this norm. If unspecified, no constraint. Constraint
        is enforced by re-projection (if necessary) at the end of each
        update.
    max_row_norm: float, optional
        Like max_col_norm, but applied to the rows.
    mask_weights: ndarray, optional
        A binary matrix multiplied by the weights after each update,
        allowing you to restrict their connectivity.
    min_zero: bool, optional
        If true, includes a zero in the set we take a max over for each
        maxout unit. This is equivalent to pooling over rectified
        linear units.
    """

    def __str__(self):
        """
        Returns
        -------
        rval : str
            A string representation of the object. In this case, just the
            class name.
        """
        return "Maxout"

    def __init__(self,
                 layer_name,
                 num_units,
                 num_pieces,
                 pool_stride=None,
                 randomize_pools=False,
                 irange=None,
                 sparse_init=None,
                 sparse_stdev=1.,
                 include_prob=1.0,
                 init_bias=0.,
                 W_lr_scale=None,
                 b_lr_scale=None,
                 max_col_norm=None,
                 max_row_norm=None,
                 mask_weights=None,
                 min_zero=False):

        super(Discomax, self).__init__()

        detector_layer_dim = num_units * num_pieces
        pool_size = num_pieces

        if pool_stride is None:
            pool_stride = pool_size

        self.__dict__.update(locals())
        del self.self

        self.b = sharedX(np.zeros((self.detector_layer_dim,)) + init_bias,
                         name=(layer_name + '_b'))
        self.ofs = sharedX(np.zeros((self.detector_layer_dim,)),
                         name=(layer_name + '_ofs'))

        if max_row_norm is not None:
            raise NotImplementedError()

    @functools.wraps(Model.get_lr_scalers)
    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """
        Tells the layer to use the specified input space.

        This resets parameters! The weight matrix is initialized with the
        size needed to receive input from this space.

        Parameters
        ----------
        space : Space
            The Space that the input will lie in.
        """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not (0 == ((self.detector_layer_dim - self.pool_size) %
                      self.pool_stride)):
            if self.pool_stride == self.pool_size:
                raise ValueError("detector_layer_dim = %d, pool_size = %d. "
                                 "Should be divisible but remainder is %d" %
                                 (self.detector_layer_dim,
                                  self.pool_size,
                                  self.detector_layer_dim % self.pool_size))
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = ((self.detector_layer_dim - self.pool_size) /
                               self.pool_stride + 1)
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0., 1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))

            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.

            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros((self.detector_layer_dim,
                                self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i, j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape = (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape " +
                                 str(expected_shape) +
                                 " but got " +
                                 str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)

    def _modify_updates(self, updates):
        """
        Replaces the values in `updates` if needed to enforce the options set
        in the __init__ method, including `mask_weights` and `max_col_norm`.

        Parameters
        ----------
        updates : OrderedDict
            A dictionary mapping parameters (including parameters not
            belonging to this model) to updated values of those parameters.
            The dictionary passed in contains the updates proposed by the
            learning algorithm. This function modifies the dictionary
            directly. The modified version will be compiled and executed
            by the learning algorithm.
        """

        # Patch old pickle files
        if not hasattr(self, 'mask_weights'):
            self.mask_weights = None

        if self.mask_weights is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

        if self.max_col_norm is not None:
            assert self.max_row_norm is None
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        if self.ofs in updates:
            updates[self.ofs] = T.clip(updates[self.ofs], 0., 1e6)

    @functools.wraps(Model.get_params)
    def get_params(self):
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        rval.append(self.ofs)
        return rval

    @functools.wraps(Layer.get_weight_decay)
    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    @functools.wraps(Layer.get_l1_weight_decay)
    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.abs_(W).sum()

    @functools.wraps(Model.get_weights)
    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()
        W = W.get_value()

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            warnings.warn("randomize_pools makes get_weights multiply by the "
                          "permutation matrix. If you call set_weights(W) and "
                          "then call get_weights(), the return value will "
                          "WP not W.")
            P = self.permute.get_value()
            return np.dot(W, P)

        return W

    @functools.wraps(Layer.set_weights)
    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    @functools.wraps(Layer.set_biases)
    def set_biases(self, biases):
        self.b.set_value(biases)

    @functools.wraps(Layer.get_biases)
    def get_biases(self):
        return self.b.get_value()

    @functools.wraps(Model.get_weights_format)
    def get_weights_format(self):
        return ('v', 'h')

    @functools.wraps(Model.get_weights_view_shape)
    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decide how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total // cols
        if rows * cols < total:
            rows = rows + 1
        return rows, cols

    @functools.wraps(Model.get_weights_topo)
    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        # There was an implementation of this, but it was broken
        raise NotImplementedError()

    @functools.wraps(Layer.get_monitoring_channels)
    def get_monitoring_channels(self):
        warnings.warn("Layer.get_monitoring_channels is " +
                      "deprecated. Use get_layer_monitoring_channels " +
                      "instead. Layer.get_monitoring_channels " +
                      "will be removed on or after september 24th 2014",
                      stacklevel=2)

        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_min = row_norms.min()
        row_norms_min.__doc__ = ("The smallest norm of any row of the "
                                 "weight matrix W. This is a measure of the "
                                 "least influence any visible unit has.")

        return OrderedDict([('row_norms_min',  row_norms_min),
                            ('row_norms_mean', row_norms.mean()),
                            ('row_norms_max',  row_norms.max()),
                            ('col_norms_min',  col_norms.min()),
                            ('col_norms_mean', col_norms.mean()),
                            ('col_norms_max',  col_norms.max()), ])

    @functools.wraps(Layer.get_monitoring_channels_from_state)
    def get_monitoring_channels_from_state(self, state):
        warnings.warn("Layer.get_monitoring_channels_from_state is " +
                      "deprecated. Use get_layer_monitoring_channels " +
                      "instead. Layer.get_monitoring_channels_from_state " +
                      "will be removed on or after september 24th 2014",
                      stacklevel=2)

        P = state

        rval = OrderedDict()

        if self.pool_size == 1:
            vars_and_prefixes = [(P, '')]
        else:
            vars_and_prefixes = [(P, 'p_')]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over
            # e*x*amples" The x and u are included in the name because
            # otherwise its hard to remember which axis is which when reading
            # the monitor I use inner.outer rather than outer_of_inner or
            # something like that because I want mean_x.* to appear next to
            # each other in the alphabetical list, as these are commonly
            # plotted together
            for key, val in [('max_x.max_u', v_max.max()),
                             ('max_x.mean_u', v_max.mean()),
                             ('max_x.min_u', v_max.min()),
                             ('min_x.max_u', v_min.max()),
                             ('min_x.mean_u', v_min.mean()),
                             ('min_x.min_u', v_min.min()),
                             ('range_x.max_u', v_range.max()),
                             ('range_x.mean_u', v_range.mean()),
                             ('range_x.min_u', v_range.min()),
                             ('mean_x.max_u', v_mean.max()),
                             ('mean_x.mean_u', v_mean.mean()),
                             ('mean_x.min_u', v_mean.min())]:
                rval[prefix+key] = val

        return rval

    @functools.wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self, state_below=None,
                                      state=None, targets=None):

        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        row_norms_min = row_norms.min()
        row_norms_min.__doc__ = ("The smallest norm of any row of the "
                                 "weight matrix W. This is a measure of the "
                                 "least influence any visible unit has.")

        rval = OrderedDict([('row_norms_min',  row_norms_min),
                            ('row_norms_mean', row_norms.mean()),
                            ('row_norms_max',  row_norms.max()),
                            ('col_norms_min',  col_norms.min()),
                            ('col_norms_mean', col_norms.mean()),
                            ('col_norms_max',  col_norms.max()), ])

        if (state is not None) or (state_below is not None):
            if state is None:
                state = self.fprop(state_below)

            P = state
            if self.pool_size == 1:
                vars_and_prefixes = [(P, '')]
            else:
                vars_and_prefixes = [(P, 'p_')]

            for var, prefix in vars_and_prefixes:
                v_max = var.max(axis=0)
                v_min = var.min(axis=0)
                v_mean = var.mean(axis=0)
                v_range = v_max - v_min

                # max_x.mean_u is "the mean over *u*nits of the max over
                # e*x*amples" The x and u are included in the name because
                # otherwise its hard to remember which axis is which when
                # reading the monitor I use inner.outer
                # rather than outer_of_inner or
                # something like that because I want mean_x.* to appear next to
                # each other in the alphabetical list, as these are commonly
                # plotted together
                for key, val in [('max_x.max_u', v_max.max()),
                                 ('max_x.mean_u', v_max.mean()),
                                 ('max_x.min_u', v_max.min()),
                                 ('min_x.max_u', v_min.max()),
                                 ('min_x.mean_u', v_min.mean()),
                                 ('min_x.min_u', v_min.min()),
                                 ('range_x.max_u', v_range.max()),
                                 ('range_x.mean_u', v_range.mean()),
                                 ('range_x.min_u', v_range.min()),
                                 ('mean_x.max_u', v_mean.max()),
                                 ('mean_x.mean_u', v_mean.mean()),
                                 ('mean_x.min_u', v_mean.min())]:
                    rval[prefix+key] = val

        return rval

    @functools.wraps(Layer.fprop)
    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        z = T.switch(z > 0., z + self.ofs, z)


        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        if self.min_zero:
            p = 0.
        else:
            p = None

        last_start = self.detector_layer_dim - self.pool_size
        for i in xrange(self.pool_size):
            cur = z[:, i:last_start+i+1:self.pool_stride]
            if p is None:
                p = cur
            else:
                p = T.maximum(cur, p)

        p.name = self.layer_name + '_p_'

        return p
예제 #31
0
파일: dbm.py 프로젝트: deigen/pylearn
class BinaryVectorMaxPool(HiddenLayer):
    """
        A hidden layer that does max-pooling on binary vectors.
        It has two sublayers, the detector layer and the pooling
        layer. The detector layer is its downward state and the pooling
        layer is its upward state.

        TODO: this layer uses (pooled, detector) as its total state,
              which can be confusing when listing all the states in
              the network left to right. Change this and
              pylearn2.expr.probabilistic_max_pooling to use
              (detector, pooled)
    """

    def __init__(self,
             detector_layer_dim,
            pool_size,
            layer_name,
            irange = None,
            sparse_init = None,
            include_prob = 1.0,
            init_bias = 0.):
        """

            include_prob: probability of including a weight element in the set
                    of weights initialized to U(-irange, irange). If not included
                    it is initialized to 0.

        """
        self.__dict__.update(locals())
        del self.self

        self.b = sharedX( np.zeros((self.detector_layer_dim,)) + init_bias, name = layer_name + '_b')

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)


        if not (self.detector_layer_dim % self.pool_size == 0):
            raise ValueError("detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d" %
                    (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size))

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = self.detector_layer_dim / self.pool_size
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.dbm.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                                 self.irange,
                                 (self.input_dim, self.detector_layer_dim)) * \
                    (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                     < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))
            for i in xrange(self.detector_layer_dim):
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0:
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None

    def get_total_state_space(self):
        return CompositeSpace((self.output_space, self.h_space))

    def get_params(self):
        assert self.b.name is not None
        W ,= self.transformer.get_params()
        assert W.name is not None
        return self.transformer.get_params().union([self.b])

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float)
        W ,= self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W ,= self.transformer.get_params()
        return W.get_value()

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decidew how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total / cols
        return rows, cols


    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W ,= self.transformer.get_params()

        W = W.T

        W = W.reshape((self.detector_layer_dim, self.input_space.shape[0],
            self.input_space.shape[1], self.input_space.nchannels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    def upward_state(self, total_state):
        p,h = total_state
        self.h_space.validate(h)
        self.output_space.validate(p)
        return p

    def downward_state(self, total_state):
        p,h = total_state
        return h

    def get_monitoring_channels_from_state(self, state):

        P, H = state

        rval ={}

        if self.pool_size == 1:
            vars_and_prefixes = [ (P,'') ]
        else:
            vars_and_prefixes = [ (P, 'p_'), (H, 'h_') ]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            for key, val in [
                    ('max_max', v_max.max()),
                    ('max_mean', v_max.mean()),
                    ('max_min', v_max.min()),
                    ('min_max', v_min.max()),
                    ('min_mean', v_min.mean()),
                    ('min_max', v_min.max()),
                    ('range_max', v_range.max()),
                    ('range_mean', v_range.mean()),
                    ('range_min', v_range.min()),
                    ('mean_max', v_mean.max()),
                    ('mean_mean', v_mean.mean()),
                    ('mean_min', v_mean.min())
                    ]:
                rval[prefix+key] = val

        return rval


    def get_l1_act_cost(self, state, target, coeff, eps = None):
        rval = 0.

        P, H = state
        self.output_space.validate(P)
        self.h_space.validate(H)


        if self.pool_size == 1:
            # If the pool size is 1 then pools = detectors
            # and we should not penalize pools and detectors separately
            assert len(state) == 2
            assert isinstance(target, float)
            assert isinstance(coeff, float)
            _, state = state
            state = [state]
            target = [target]
            coeff = [coeff]
            if eps is None:
                eps = [0.]
            else:
                eps = [eps]
        else:
            assert all([len(elem) == 2 for elem in [state, target, coeff]])
            if eps is None:
                eps = [0., 0.]
            if target[1] < target[0]:
                warnings.warn("Do you really want to regularize the detector units to be sparser than the pooling units?")

        for s, t, c, e in safe_zip(state, target, coeff, eps):
            assert all([isinstance(elem, float) for elem in [t, c, e]])
            if c == 0.:
                continue
            m = s.mean(axis=0)
            assert m.ndim == 1
            rval += T.maximum(abs(m-t)-e,0.).mean()*c

        return rval

    def sample(self, state_below = None, state_above = None,
            layer_above = None,
            theano_rng = None):

        if theano_rng is None:
            raise ValueError("theano_rng is required; it just defaults to None so that it may appear after layer_above / state_above in the list.")

        if state_above is not None:
            msg = layer_above.downward_message(state_above)
        else:
            msg = None

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b
        p, h, p_sample, h_sample = max_pool_channels(z,
                self.pool_size, msg, theano_rng)

        return p_sample, h_sample

    def downward_message(self, downward_state):
        rval = self.transformer.lmul_T(downward_state)

        if self.requires_reformat:
            rval = self.desired_space.format_as(rval, self.input_space)

        return rval

    def make_state(self, num_examples, numpy_rng):
        """ Returns a shared variable containing an actual state
           (not a mean field state) for this variable.
        """

        t1 = time.time()

        empty_input = self.h_space.get_origin_batch(num_examples)
        h_state = sharedX(empty_input)

        default_z = T.zeros_like(h_state) + self.b

        theano_rng = MRG_RandomStreams(numpy_rng.randint(2 ** 16))

        p_exp, h_exp, p_sample, h_sample = max_pool_channels(
                z = default_z,
                pool_size = self.pool_size,
                theano_rng = theano_rng)

        assert h_sample.dtype == default_z.dtype

        p_state = sharedX( self.output_space.get_origin_batch(
            num_examples))

        t2 = time.time()

        f = function([], updates = {
            p_state : p_sample,
            h_state : h_sample
            })

        t3 = time.time()

        f()

        t4 = time.time()

        print str(self)+'.make_state took',t4-t1
        print '\tcompose time:',t2-t1
        print '\tcompile time:',t3-t2
        print '\texecute time:',t4-t3

        p_state.name = 'p_sample_shared'
        h_state.name = 'h_sample_shared'

        return p_state, h_state

    def expected_energy_term(self, state, average, state_below, average_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        downward_state = self.downward_state(state)
        self.h_space.validate(downward_state)

        # Energy function is linear so it doesn't matter if we're averaging or not
        # Specifically, our terms are -u^T W d - b^T d where u is the upward state of layer below
        # and d is the downward state of this layer

        bias_term = T.dot(downward_state, self.b)
        weights_term = (self.transformer.lmul(state_below) * downward_state).sum(axis=1)

        rval = -bias_term - weights_term

        assert rval.ndim == 1

        return rval

    def mf_update(self, state_below, state_above, layer_above = None, double_weights = False, iter_name = None):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        if iter_name is None:
            iter_name = 'anon'

        if state_above is not None:
            assert layer_above is not None
            msg = layer_above.downward_message(state_above)
            msg.name = 'msg_from_'+layer_above.layer_name+'_to_'+self.layer_name+'['+iter_name+']'
        else:
            msg = None

        if double_weights:
            state_below = 2. * state_below
            state_below.name = self.layer_name + '_'+iter_name + '_2state'
        z = self.transformer.lmul(state_below) + self.b
        if self.layer_name is not None and iter_name is not None:
            z.name = self.layer_name + '_' + iter_name + '_z'
        p,h = max_pool_channels(z, self.pool_size, msg)

        p.name = self.layer_name + '_p_' + iter_name
        h.name = self.layer_name + '_h_' + iter_name

        return p, h
예제 #32
0
class MLP_GatedRectifier(Layer):
    """
    A hidden layer that does max pooling over groups of linear
    units. If you use this code in a research project, please
    cite

    "Maxout Networks" Ian J. Goodfellow, David Warde-Farley,
    Mehdi Mirza, Aaron Courville, and Yoshua Bengio. arXiv 2013
    """
    def __init__(
        self,
        layer_name,
        num_units,
        num_pieces,
        gater=None,
        pool_stride=None,
        randomize_pools=False,
        irange=None,
        sparse_init=None,
        sparse_stdev=1.,
        include_prob=1.0,
        init_bias=0.,
        W_lr_scale=None,
        b_lr_scale=None,
        max_col_norm=None,
        max_row_norm=None,
        mask_weights=None,
    ):
        """
            layer_name: A name for this layer that will be prepended to
                        monitoring channels related to this layer.
            num_units: The number of maxout units to use in this layer.
            num_pieces: The number of linear pieces to use in each maxout
                        unit.
            pool_stride: The distance between the start of each max pooling
                        region. Defaults to num_pieces, which makes the
                        pooling regions disjoint. If set to a smaller number,
                        can do overlapping pools.
            randomize_pools: Does max pooling over randomized subsets of
                        the linear responses, rather than over sequential
                        subsets.
            irange: if specified, initializes each weight randomly in
                U(-irange, irange)
            sparse_init: if specified, irange must not be specified.
                        This is an integer specifying how many weights to make
                        non-zero. All non-zero weights will be initialized
                        randomly in N(0, sparse_stdev^2)
            include_prob: probability of including a weight element in the set
               of weights initialized to U(-irange, irange). If not included
               a weight is initialized to 0. This defaults to 1.
            init_bias: All biases are initialized to this number
            W_lr_scale: The learning rate on the weights for this layer is
                multiplied by this scaling factor
            b_lr_scale: The learning rate on the biases for this layer is
                multiplied by this scaling factor
            max_col_norm: The norm of each column of the weight matrix is
                constrained to have at most this norm. If unspecified, no
                constraint. Constraint is enforced by re-projection (if
                necessary) at the end of each update.
            max_row_norm: Like max_col_norm, but applied to the rows.
            mask_weights: A binary matrix multiplied by the weights after each
                         update, allowing you to restrict their connectivity.
            min_zero: If true, includes a zero in the set we take a max over
                    for each maxout unit. This is equivalent to pooling over
                    rectified linear units.
        """

        detector_layer_dim = num_units * num_pieces
        pool_size = num_pieces

        if pool_stride is None:
            pool_stride = pool_size

        self.__dict__.update(locals())
        del self.self

        self.b = sharedX(np.zeros((self.detector_layer_dim, )) + init_bias,
                         name=layer_name + '_b')

        if max_row_norm is not None:
            raise NotImplementedError()

    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        assert self.gater.get_input_space() == space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not ((self.detector_layer_dim - self.pool_size) % self.pool_stride
                == 0):
            if self.pool_stride == self.pool_size:
                raise ValueError(
                    "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d"
                    % (self.detector_layer_dim, self.pool_size,
                       self.detector_layer_dim % self.pool_size))
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = (self.detector_layer_dim -
                               self.pool_size) / self.pool_stride + 1
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                            self.irange,
                            (self.input_dim, self.detector_layer_dim)) * \
                (rng.uniform(0.,1., (self.input_dim, self.detector_layer_dim))
                 < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))

            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.

            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros(
                (self.detector_layer_dim, self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i, j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape = (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError("Expected mask with shape " +
                                 str(expected_shape) + " but got " +
                                 str(self.mask_weights.shape))
            self.mask = sharedX(self.mask_weights)

    def censor_updates(self, updates):

        # Patch old pickle files
        if not hasattr(self, 'mask_weights'):
            self.mask_weights = None

        if self.mask_weights is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

        if self.max_col_norm is not None:
            assert self.max_row_norm is None
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

    def get_params(self):
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval + self.gater.get_params()

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.abs(W).sum()

    def get_weights(self):

        print 'Which weights? '
        print 'g) gater'
        print 'm) main network'

        x = raw_input()

        if x == 'g':
            return self.gater.get_weights()
        assert x == 'm'

        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()
        W = W.get_value()

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if self.randomize_pools:
            warnings.warn(
                "randomize_pools makes get_weights multiply by the permutation matrix. "
                "If you call set_weights(W) and then call get_weights(), the return value will "
                "WP not W.")
            P = self.permute.get_value()
            return np.dot(W, P)

        print W.shape

        return W

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decide how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total // cols
        if rows * cols < total:
            rows = rows + 1
        print total, rows, cols
        return rows, cols

    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        # There was an implementation of this, but it was broken
        raise NotImplementedError()

    def get_monitoring_channels(self):

        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict([
            ('row_norms_min', row_norms.min()),
            ('row_norms_mean', row_norms.mean()),
            ('row_norms_max', row_norms.max()),
            ('col_norms_min', col_norms.min()),
            ('col_norms_mean', col_norms.mean()),
            ('col_norms_max', col_norms.max()),
        ])

    def get_monitoring_channels_from_state(self, state):

        P = state

        rval = OrderedDict()

        if self.pool_size == 1:
            vars_and_prefixes = [(P, '')]
        else:
            vars_and_prefixes = [(P, 'p_')]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over e*x*amples"
            # The x and u are included in the name because otherwise its hard
            # to remember which axis is which when reading the monitor
            # I use inner.outer rather than outer_of_inner or something like that
            # because I want mean_x.* to appear next to each other in the alphabetical
            # list, as these are commonly plotted together
            for key, val in [('max_x.max_u', v_max.max()),
                             ('max_x.mean_u', v_max.mean()),
                             ('max_x.min_u', v_max.min()),
                             ('min_x.max_u', v_min.max()),
                             ('min_x.mean_u', v_min.mean()),
                             ('min_x.min_u', v_min.min()),
                             ('range_x.max_u', v_range.max()),
                             ('range_x.mean_u', v_range.mean()),
                             ('range_x.min_u', v_range.min()),
                             ('mean_x.max_u', v_mean.max()),
                             ('mean_x.mean_u', v_mean.mean()),
                             ('mean_x.min_u', v_mean.min())]:
                rval[prefix + key] = val

        return rval

    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" %
                            (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x, y: x * y,
                                  sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, 'randomize_pools'):
            self.randomize_pools = False

        if not hasattr(self, 'pool_stride'):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, 'min_zero'):
            self.min_zero = False

        z = T.clip(z, 0., 1e30)
        p = None

        gate = self.gater.fprop(state_below)

        last_start = self.detector_layer_dim - self.pool_size
        for i in xrange(self.pool_size):
            cur = z[:, i:last_start + i +
                    1:self.pool_stride] * gate[:, i].dimshuffle(0, 'x')
            if p is None:
                p = cur
            else:
                p = cur + p

        p.name = self.layer_name + '_p_'

        return p
예제 #33
0
파일: rbm.py 프로젝트: mruan/pylearn2
class RBM(Block, Model):
    """
    A base interface for RBMs, implementing the binary-binary case.

    """
    def __init__(self, nvis = None, nhid = None,
            vis_space = None,
            hid_space = None,
            transformer = None,
            irange=0.5, rng=None, init_bias_vis = None,
            init_bias_vis_marginals = None, init_bias_hid=0.0,
            base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1,
            random_patches_src = None,
            monitor_reconstruction = False):

        """
        Construct an RBM object.

        Parameters
        ----------
        nvis : int
            Number of visible units in the model.
            (Specifying this implies that the model acts on a vector,
            i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) )
        nhid : int
            Number of hidden units in the model.
            (Specifying this implies that the model acts on a vector)
        vis_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM acts on. Don't specify if you used nvis / hid
        hid_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM's hidden units live in. Don't specify if you used
            nvis / nhid
        init_bias_vis_marginals: either None, or a Dataset to use to initialize
            the visible biases to the inverse sigmoid of the data marginals
        irange : float, optional
            The size of the initial interval around 0 for weights.
        rng : RandomState object or seed
            NumPy RandomState object to use when initializing parameters
            of the model, or (integer) seed to use to create one.
        init_bias_vis : array_like, optional
            Initial value of the visible biases, broadcasted as necessary.
        init_bias_hid : array_like, optional
            initial value of the hidden biases, broadcasted as necessary.
        monitor_reconstruction : if True, will request a monitoring channel to monitor
            reconstruction error
        random_patches_src: Either None, or a Dataset from which to draw random patches
            in order to initialize the weights. Patches will be multiplied by irange

        Parameters for default SML learning rule:

            base_lr : the base learning rate
            anneal_start : number of steps after which to start annealing on a 1/t schedule
            nchains: number of negative chains
            sml_gibbs_steps: number of gibbs steps to take per update

        """

        Model.__init__(self)
        Block.__init__(self)

        if init_bias_vis_marginals is not None:
            assert init_bias_vis is None
            X = init_bias_vis_marginals.X
            assert X.min() >= 0.0
            assert X.max() <= 1.0

            marginals = X.mean(axis=0)

            #rescale the marginals a bit to avoid NaNs
            init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals)


        if init_bias_vis is None:
            init_bias_vis = 0.0

        if rng is None:
            # TODO: global rng configuration stuff.
            rng = numpy.random.RandomState(1001)
        self.rng = rng

        if vis_space is None:
            #if we don't specify things in terms of spaces and a transformer,
            #assume dense matrix multiplication and work off of nvis, nhid
            assert hid_space is None
            assert transformer is None or isinstance(transformer,MatrixMul)
            assert nvis is not None
            assert nhid is not None

            if transformer is None:
                if random_patches_src is None:
                    W = rng.uniform(-irange, irange, (nvis, nhid))
                else:
                    if hasattr(random_patches_src, '__array__'):
                        W = irange * random_patches_src.T
                        assert W.shape == (nvis, nhid)
                    else:
                        #assert type(irange) == type(0.01)
                        #assert irange == 0.01
                        W = irange * random_patches_src.get_batch_design(nhid).T

                self.transformer = MatrixMul(  sharedX(
                        W,
                        name='W',
                        borrow=True
                    )
                )
            else:
                self.transformer = transformer

            self.vis_space = VectorSpace(nvis)
            self.hid_space = VectorSpace(nhid)
        else:
            assert hid_space is not None
            assert transformer is not None
            assert nvis is None
            assert nhid is None

            self.vis_space = vis_space
            self.hid_space = hid_space
            self.transformer = transformer


        try:
            b_vis = self.vis_space.get_origin()
            b_vis += init_bias_vis
        except ValueError:
            raise ValueError("bad shape or value for init_bias_vis")
        self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True)

        try:
            b_hid = self.hid_space.get_origin()
            b_hid += init_bias_hid
        except ValueError:
            raise ValueError('bad shape or value for init_bias_hid')
        self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True)

        self.random_patches_src = random_patches_src
        self.register_names_to_del(['random_patches_src'])


        self.__dict__.update(nhid=nhid, nvis=nvis)
        self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid])

        self.base_lr = base_lr
        self.anneal_start = anneal_start
        self.nchains = nchains
        self.sml_gibbs_steps = sml_gibbs_steps

    def get_input_dim(self):
        if not isinstance(self.vis_space, VectorSpace):
            raise TypeError("Can't describe "+str(type(self.vis_space))+" as a dimensionality number.")
        return self.vis_space.dim

    def get_output_dim(self):
        if not isinstance(self.hid_space, VectorSpace):
            raise TypeError("Can't describe "+str(type(self.hid_space))+" as a dimensionality number.")
        return self.hid_space.dim

    def get_input_space(self):
        return self.vis_space

    def get_output_space(self):
        return self.hid_space

    def get_params(self):
        return [param for param in self._params]

    def get_weights(self, borrow=False):

        weights ,= self.transformer.get_params()

        return weights.get_value(borrow=borrow)

    def get_weights_topo(self):
        return self.transformer.get_weights_topo()

    def get_weights_format(self):
        return ['v', 'h']


    def get_monitoring_channels(self, V, Y = None):

        theano_rng = RandomStreams(42)

        #TODO: re-enable this in the case where self.transformer
        #is a matrix multiply
        #norms = theano_norms(self.weights)

        H = self.mean_h_given_v(V)

        h = H.mean(axis=0)

        return { 'bias_hid_min' : T.min(self.bias_hid),
                 'bias_hid_mean' : T.mean(self.bias_hid),
                 'bias_hid_max' : T.max(self.bias_hid),
                 'bias_vis_min' : T.min(self.bias_vis),
                 'bias_vis_mean' : T.mean(self.bias_vis),
                 'bias_vis_max': T.max(self.bias_vis),
                 'h_min' : T.min(h),
                 'h_mean': T.mean(h),
                 'h_max' : T.max(h),
                 #'W_min' : T.min(self.weights),
                 #'W_max' : T.max(self.weights),
                 #'W_norms_min' : T.min(norms),
                 #'W_norms_max' : T.max(norms),
                 #'W_norms_mean' : T.mean(norms),
                'reconstruction_error' : self.reconstruction_error(V, theano_rng) }

    def ml_gradients(self, pos_v, neg_v):
        """
        Get the contrastive gradients given positive and negative phase
        visible units.

        Parameters
        ----------
        pos_v : tensor_like
            Theano symbolic representing a minibatch on the visible units,
            with the first dimension indexing training examples and the second
            indexing data dimensions (usually actual training data).
        neg_v : tensor_like
            Theano symbolic representing a minibatch on the visible units,
            with the first dimension indexing training examples and the second
            indexing data dimensions (usually reconstructions of the data or
            sampler particles from a persistent Markov chain).

        Returns
        -------
        grads : list
            List of Theano symbolic variables representing gradients with
            respect to model parameters, in the same order as returned by
            `params()`.

        Notes
        -----
        `pos_v` and `neg_v` need not have the same first dimension, i.e.
        minibatch size.
        """

        # taking the mean over each term independently allows for different
        # mini-batch sizes in the positive and negative phase.
        ml_cost = (self.free_energy_given_v(pos_v).mean() -
                   self.free_energy_given_v(neg_v).mean())

        grads = tensor.grad(ml_cost, self.get_params(),
                            consider_constant=[pos_v, neg_v])

        return grads


    def train_batch(self, dataset, batch_size):
        """ A default learning rule based on SML """
        self.learn_mini_batch(dataset.get_batch_design(batch_size))
        return True

    def learn_mini_batch(self, X):
        """ A default learning rule based on SML """

        if not hasattr(self, 'learn_func'):
            self.redo_theano()

        rval =  self.learn_func(X)

        return rval

    def redo_theano(self):
        """ Compiles the theano function for the default learning rule """

        init_names = dir(self)

        minibatch = tensor.matrix()

        optimizer = _SGDOptimizer(self, self.base_lr, self.anneal_start)

        sampler = sampler = BlockGibbsSampler(self, 0.5 + np.zeros((self.nchains, self.get_input_dim())), self.rng,
                                                  steps= self.sml_gibbs_steps)


        updates = training_updates(visible_batch=minibatch, model=self,
                                            sampler=sampler, optimizer=optimizer)

        self.learn_func = theano.function([minibatch], updates=updates)

        final_names = dir(self)

        self.register_names_to_del([name for name in final_names if name not in init_names])

    def gibbs_step_for_v(self, v, rng):
        """
        Do a round of block Gibbs sampling given visible configuration

        Parameters
        ----------
        v  : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples (or negative phase particles), with the first
            dimension indexing training examples and the second indexing data
            dimensions.
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible
            units.

        Returns
        -------
        v_sample : tensor_like
            Theano symbolic representing the new visible unit state after one
            round of Gibbs sampling.
        locals : dict
            Contains the following auxiliary state as keys (all symbolics
            except shape tuples):
             * `h_mean`: the returned value from `mean_h_given_v`
             * `h_mean_shape`: shape tuple indicating the size of `h_mean` and
               `h_sample`
             * `h_sample`: the stochastically sampled hidden units
             * `v_mean_shape`: shape tuple indicating the shape of `v_mean` and
               `v_sample`
             * `v_mean`: the returned value from `mean_v_given_h`
             * `v_sample`: the stochastically sampled visible units
        """
        h_mean = self.mean_h_given_v(v)
        assert h_mean.type.dtype == v.type.dtype
        # For binary hidden units
        # TODO: factor further to extend to other kinds of hidden units
        #       (e.g. spike-and-slab)
        h_sample = rng.binomial(size = h_mean.shape, n = 1 , p = h_mean, dtype=h_mean.type.dtype)
        assert h_sample.type.dtype == v.type.dtype
        # v_mean is always based on h_sample, not h_mean, because we don't
        # want h transmitting more than one bit of information per unit.
        v_mean = self.mean_v_given_h(h_sample)
        assert v_mean.type.dtype == v.type.dtype
        v_sample = self.sample_visibles([v_mean], v_mean.shape, rng)
        assert v_sample.type.dtype == v.type.dtype
        return v_sample, locals()

    def sample_visibles(self, params, shape, rng):
        """
        Stochastically sample the visible units given hidden unit
        configurations for a set of training examples.

        Parameters
        ----------
        params : list
            List of the necessary parameters to sample :math:`p(v|h)`. In the
            case of a binary-binary RBM this is a single-element list
            containing the symbolic representing :math:`p(v|h)`, as returned
            by `mean_v_given_h`.

        Returns
        -------
        vprime : tensor_like
            Theano symbolic representing stochastic samples from :math:`p(v|h)`
        """
        v_mean = params[0]
        return as_floatX(rng.uniform(size=shape) < v_mean)

    def input_to_h_from_v(self, v):
        """
        Compute the affine function (linear map plus bias) that serves as
        input to the hidden layer in an RBM.

        Parameters
        ----------
        v  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the one or several
            minibatches on the visible units, with the first dimension indexing
            training examples and the second indexing data dimensions.

        Returns
        -------
        a : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the input to each
            hidden unit for each training example.
        """

        if isinstance(v, tensor.Variable):
            return self.bias_hid + self.transformer.lmul(v)
        else:
            return [self.input_to_h_from_v(vis) for vis in v]

    def input_to_v_from_h(self, h):
        """
        Compute the affine function (linear map plus bias) that serves as
        input to the visible layer in an RBM.

        Parameters
        ----------
        h  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the one or several
            minibatches on the hidden units, with the first dimension indexing
            training examples and the second indexing data dimensions.

        Returns
        -------
        a : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the input to each
            visible unit for each row of h.
        """
        if isinstance(h, tensor.Variable):
            return self.bias_vis + self.transformer.lmul_T(h)
        else:
            return [self.input_to_v_from_h(hid) for hid in h]

    def mean_h_given_v(self, v):
        """
        Compute the mean activation of the hidden units given visible unit
        configurations for a set of training examples.

        Parameters
        ----------
        v  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the hidden unit
            states for a batch (or several) of training examples, with the
            first dimension indexing training examples and the second indexing
            data dimensions.

        Returns
        -------
        h : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the mean
            (deterministic) hidden unit activations given the visible units.
        """
        if isinstance(v, tensor.Variable):
            return nnet.sigmoid(self.input_to_h_from_v(v))
        else:
            return [self.mean_h_given_v(vis) for vis in v]

    def mean_v_given_h(self, h):
        """
        Compute the mean activation of the visibles given hidden unit
        configurations for a set of training examples.

        Parameters
        ----------
        h  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the hidden unit
            states for a batch (or several) of training examples, with the
            first dimension indexing training examples and the second indexing
            hidden units.

        Returns
        -------
        vprime : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the mean
            (deterministic) reconstruction of the visible units given the
            hidden units.
        """
        if isinstance(h, tensor.Variable):
            return nnet.sigmoid(self.input_to_v_from_h(h))
        else:
            return [self.mean_v_given_h(hid) for hid in h]

    def free_energy_given_v(self, v):
        """
        Calculate the free energy of a visible unit configuration by
        marginalizing over the hidden units.

        Parameters
        ----------
        v : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples, with the first dimension indexing training
            examples and the second indexing data dimensions.

        Returns
        -------
        f : tensor_like
            1-dimensional tensor (vector) representing the free energy
            associated with each row of v.
        """
        sigmoid_arg = self.input_to_h_from_v(v)
        return (-tensor.dot(v, self.bias_vis) -
                 nnet.softplus(sigmoid_arg).sum(axis=1))

    def free_energy(self, V):
        return self.free_energy_given_v(V)


    def free_energy_given_h(self, h):
        """
        Calculate the free energy of a hidden unit configuration by
        marginalizing over the visible units.

        Parameters
        ----------
        h : tensor_like
            Theano symbolic representing the hidden unit states, with the
            first dimension indexing training examples and the second
            indexing data dimensions.

        Returns
        -------
        f : tensor_like
            1-dimensional tensor (vector) representing the free energy
            associated with each row of v.
        """
        sigmoid_arg = self.input_to_v_from_h(h)
        return (-tensor.dot(h, self.bias_hid) -
                nnet.softplus(sigmoid_arg).sum(axis=1))

    def __call__(self, v):
        """
        Forward propagate (symbolic) input through this module, obtaining
        a representation to pass on to layers above.

        This just aliases the `mean_h_given_v()` function for syntactic
        sugar/convenience.
        """
        return self.mean_h_given_v(v)

    def reconstruction_error(self, v, rng):
        """
        Compute the mean-squared error (mean over examples, sum over units)
        across a minibatch after a Gibbs
        step starting from the training data.

        Parameters
        ----------
        v : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples, with the first dimension indexing training
            examples and the second indexing data dimensions.
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible
            units.

        Returns
        -------
        mse : tensor_like
            0-dimensional tensor (essentially a scalar) indicating the mean
            reconstruction error across the minibatch.

        Notes
        -----
        The reconstruction used to assess error samples only the hidden
        units. For the visible units, it uses the conditional mean.
        No sampling of the visible units is done, to reduce noise in the estimate.
        """
        sample, _locals = self.gibbs_step_for_v(v, rng)
        return ((_locals['v_mean'] - v) ** 2).sum(axis=1).mean()
예제 #34
0
class CpuConvMaxout(Layer):
    """
    .. todo::

        WRITEME
    """
    def __init__(self,
                 output_channels,
                 num_pieces,
                 kernel_shape,
                 pool_shape,
                 pool_stride,
                 layer_name,
                 irange=None,
                 border_mode='valid',
                 sparse_init=None,
                 include_prob=1.0,
                 init_bias=0.,
                 W_lr_scale=None,
                 b_lr_scale=None,
                 left_slope=0.0,
                 max_kernel_norm=None,
                 pool_type='max',
                 detector_normalization=None,
                 output_normalization=None,
                 kernel_stride=(1, 1)):
        """
        .. todo::

            WRITEME properly

         output_channels: The number of output channels the layer should have.
         kernel_shape: The shape of the convolution kernel.
         pool_shape: The shape of the spatial max pooling. A two-tuple of ints.
         pool_stride: The stride of the spatial max pooling. Also must be
                      square.
         layer_name: A name for this layer that will be prepended to
                     monitoring channels related to this layer.
         irange: if specified, initializes each weight randomly in
                 U(-irange, irange)
         border_mode: A string indicating the size of the output:
            full - The output is the full discrete linear convolution of the
                   inputs.
            valid - The output consists only of those elements that do not rely
                    on the zero-padding.(Default)
         include_prob: probability of including a weight element in the set
                       of weights initialized to U(-irange, irange). If not
                       included it is initialized to 0.
         init_bias: All biases are initialized to this number
         W_lr_scale: The learning rate on the weights for this layer is
                     multiplied by this scaling factor
         b_lr_scale: The learning rate on the biases for this layer is
                     multiplied by this scaling factor
         left_slope: **TODO**
         max_kernel_norm: If specifed, each kernel is constrained to have at
                          most this norm.
         pool_type: The type of the pooling operation performed the the
                    convolution. Default pooling type is max-pooling.
         detector_normalization, output_normalization:
              if specified, should be a callable object. the state of the
              network is optionally replaced with normalization(state) at each
              of the 3 points in processing:
                  detector: the maxout units can be normalized prior to the
                            spatial pooling
                  output: the output of the layer, after sptial pooling, can
                          be normalized as well
         kernel_stride: The stride of the convolution kernel. A two-tuple of
                        ints.
        """

        #super(ConvRectifiedLinear, self).__init__()

        if (irange is None) and (sparse_init is None):
            raise AssertionError("You should specify either irange or "
                                 "sparse_init when calling the constructor of "
                                 "ConvRectifiedLinear.")
        elif (irange is not None) and (sparse_init is not None):
            raise AssertionError("You should specify either irange or "
                                 "sparse_init when calling the constructor of "
                                 "ConvRectifiedLinear and not both.")

        self.__dict__.update(locals())
        del self.self

    @wraps(Layer.get_lr_scalers)
    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    @wraps(Layer.set_input_space)
    def set_input_space(self, space):

        self.input_space = space

        if not isinstance(space, Conv2DSpace):
            raise BadInputSpaceError("ConvRectifiedLinear.set_input_space "
                                     "expected a Conv2DSpace, got " +
                                     str(space) + " of type " +
                                     str(type(space)))

        rng = self.mlp.rng

        if self.border_mode == 'valid':
            output_shape = [
                (self.input_space.shape[0] - self.kernel_shape[0]) /
                self.kernel_stride[0] + 1,
                (self.input_space.shape[1] - self.kernel_shape[1]) /
                self.kernel_stride[1] + 1
            ]
        elif self.border_mode == 'full':
            output_shape = [
                (self.input_space.shape[0] + self.kernel_shape[0]) /
                self.kernel_stride[0] - 1,
                (self.input_space.shape[1] + self.kernel_shape[1]) /
                self.kernel_stride[1] - 1
            ]

        self.detector_space = Conv2DSpace(shape=output_shape,
                                          num_channels=self.output_channels,
                                          axes=('b', 'c', 0, 1))

        if self.irange is not None:
            assert self.sparse_init is None
            self.transformer = conv2d.make_random_conv2D(
                irange=self.irange,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)
        elif self.sparse_init is not None:
            self.transformer = conv2d.make_sparse_random_conv2D(
                num_nonzero=self.sparse_init,
                input_space=self.input_space,
                output_space=self.detector_space,
                kernel_shape=self.kernel_shape,
                batch_size=self.mlp.batch_size,
                subsample=self.kernel_stride,
                border_mode=self.border_mode,
                rng=rng)

        W, = self.transformer.get_params()
        W.name = 'W'
        self.b = sharedX(
            np.zeros(((self.num_pieces * self.output_channels), )) +
            self.init_bias)
        self.b.name = 'b'

        print 'Input shape: ', self.input_space.shape
        print 'Detector space: ', self.detector_space.shape

        assert self.pool_type in ['max', 'mean']

        dummy_batch_size = self.mlp.batch_size
        if dummy_batch_size is None:
            dummy_batch_size = 2
        dummy_detector = sharedX(
            self.detector_space.get_origin_batch(dummy_batch_size))

        #dummy_p = dummy_p.eval()
        self.output_space = Conv2DSpace(shape=[400, 1],
                                        num_channels=self.output_channels,
                                        axes=('b', 'c', 0, 1))

        W = rng.uniform(-self.irange, self.irange,
                        (426, (self.num_pieces * self.output_channels)))
        W = sharedX(W)
        W.name = self.layer_name + "_w"
        self.transformer = MatrixMul(W)

        print 'Output space: ', self.output_space.shape

    @wraps(Layer.censor_updates)
    def censor_updates(self, updates):
        """
        .. todo::

            WRITEME
        """

        if self.max_kernel_norm is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                row_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=(1)))
                desired_norms = T.clip(row_norms, 0, self.max_kernel_norm)
                scales = desired_norms / (1e-7 + row_norms)
                updates[W] = updated_W * scales.dimshuffle(0, 'x')

    @wraps(Layer.get_params)
    def get_params(self):
        """
        .. todo::

            WRITEME
        """
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval

    @wraps(Layer.get_weight_decay)
    def get_weight_decay(self, coeff):

        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    @wraps(Layer.get_l1_weight_decay)
    def get_l1_weight_decay(self, coeff):

        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W, = self.transformer.get_params()
        return coeff * abs(W).sum()

    @wraps(Layer.set_weights)
    def set_weights(self, weights):

        W, = self.transformer.get_params()
        W.set_value(weights)

    @wraps(Layer.set_biases)
    def set_biases(self, biases):

        self.b.set_value(biases)

    @wraps(Layer.get_biases)
    def get_biases(self):

        return self.b.get_value()

    @wraps(Layer.get_weights_format)
    def get_weights_format(self):

        return ('v', 'h')

    @wraps(Layer.get_weights_topo)
    def get_weights_topo(self):

        outp, inp, rows, cols = range(4)
        raw = self.transformer._filters.get_value()

        return np.transpose(raw, (outp, rows, cols, inp))

    @wraps(Layer.get_monitoring_channels)
    def get_monitoring_channels(self):

        W, = self.transformer.get_params()

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=(1)))

        return OrderedDict([
            ('kernel_norms_min', row_norms.min()),
            ('kernel_norms_mean', row_norms.mean()),
            ('kernel_norms_max', row_norms.max()),
        ])

    @wraps(Layer.fprop)
    def fprop(self, state_below):

        self.input_space.validate(state_below)
        axes = self.input_space.axes
        #z = self.transformer.lmul(state_below) + self.b
        state_below = state_below.dimshuffle(3, 1, 2, 0)
        z = self.transformer.lmul(state_below) + self.b
        z = z.dimshuffle(0, 3, 1, 2)

        if self.layer_name is not None:
            z.name = self.layer_name + '_z'

#ReLUs
        d = T.maximum(z, 0)

        # Max pooling between linear pieces
        # d = None
        # for i in xrange(self.num_pieces):
        # t = z[:,i::self.num_pieces,:,:]
        # if d is None:
        # d = t
        # else:
        # d = T.maximum(d, t)

        self.detector_space.validate(d)

        if not hasattr(self, 'detector_normalization'):
            self.detector_normalization = None

        if self.detector_normalization:
            d = self.detector_normalization(d)

        # NOTE : Custom pooling
        p = d.max(3)[:, :, :, None]

        self.output_space.validate(p)

        if not hasattr(self, 'output_normalization'):
            self.output_normalization = None

        if self.output_normalization:
            p = self.output_normalization(p)

        return p
예제 #35
0
class RBM(Block, Model):
    """
    A base interface for RBMs, implementing the binary-binary case.

    """
    def __init__(self, nvis = None, nhid = None,
            vis_space = None,
            hid_space = None,
            transformer = None,
            irange=0.5, rng=None, init_bias_vis = None,
            init_bias_vis_marginals = None, init_bias_hid=0.0,
            base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1,
            random_patches_src = None,
            monitor_reconstruction = False):

        """
        Construct an RBM object.

        Parameters
        ----------
        nvis : int
            Number of visible units in the model.
            (Specifying this implies that the model acts on a vector,
            i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) )
        nhid : int
            Number of hidden units in the model.
            (Specifying this implies that the model acts on a vector)
        vis_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM acts on. Don't specify if you used nvis / hid
        hid_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM's hidden units live in. Don't specify if you used
            nvis / nhid
        init_bias_vis_marginals: either None, or a Dataset to use to initialize
            the visible biases to the inverse sigmoid of the data marginals
        irange : float, optional
            The size of the initial interval around 0 for weights.
        rng : RandomState object or seed
            NumPy RandomState object to use when initializing parameters
            of the model, or (integer) seed to use to create one.
        init_bias_vis : array_like, optional
            Initial value of the visible biases, broadcasted as necessary.
        init_bias_hid : array_like, optional
            initial value of the hidden biases, broadcasted as necessary.
        monitor_reconstruction : if True, will request a monitoring channel to monitor
            reconstruction error
        random_patches_src: Either None, or a Dataset from which to draw random patches
            in order to initialize the weights. Patches will be multiplied by irange

        Parameters for default SML learning rule:

            base_lr : the base learning rate
            anneal_start : number of steps after which to start annealing on a 1/t schedule
            nchains: number of negative chains
            sml_gibbs_steps: number of gibbs steps to take per update

        """

        Model.__init__(self)
        Block.__init__(self)

        if init_bias_vis_marginals is not None:
            assert init_bias_vis is None
            X = init_bias_vis_marginals.X
            assert X.min() >= 0.0
            assert X.max() <= 1.0

            marginals = X.mean(axis=0)

            #rescale the marginals a bit to avoid NaNs
            init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals)


        if init_bias_vis is None:
            init_bias_vis = 0.0

        if rng is None:
            # TODO: global rng configuration stuff.
            rng = numpy.random.RandomState(1001)
        self.rng = rng

        if vis_space is None:
            #if we don't specify things in terms of spaces and a transformer,
            #assume dense matrix multiplication and work off of nvis, nhid
            assert hid_space is None
            assert transformer is None or isinstance(transformer,MatrixMul)
            assert nvis is not None
            assert nhid is not None

            if transformer is None:
                if random_patches_src is None:
                    W = rng.uniform(-irange, irange, (nvis, nhid))
                else:
                    if hasattr(random_patches_src, '__array__'):
                        W = irange * random_patches_src.T
                        assert W.shape == (nvis, nhid)
                    else:
                        #assert type(irange) == type(0.01)
                        #assert irange == 0.01
                        W = irange * random_patches_src.get_batch_design(nhid).T

                self.transformer = MatrixMul(  sharedX(
                        W,
                        name='W',
                        borrow=True
                    )
                )
            else:
                self.transformer = transformer

            self.vis_space = VectorSpace(nvis)
            self.hid_space = VectorSpace(nhid)
        else:
            assert hid_space is not None
            assert transformer is not None
            assert nvis is None
            assert nhid is None

            self.vis_space = vis_space
            self.hid_space = hid_space
            self.transformer = transformer


        try:
            b_vis = self.vis_space.get_origin()
            b_vis += init_bias_vis
        except ValueError:
            raise ValueError("bad shape or value for init_bias_vis")
        self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True)

        try:
            b_hid = self.hid_space.get_origin()
            b_hid += init_bias_hid
        except ValueError:
            raise ValueError('bad shape or value for init_bias_hid')
        self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True)

        self.random_patches_src = random_patches_src
        self.register_names_to_del(['random_patches_src'])


        self.__dict__.update(nhid=nhid, nvis=nvis)
        self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid])

        self.base_lr = base_lr
        self.anneal_start = anneal_start
        self.nchains = nchains
        self.sml_gibbs_steps = sml_gibbs_steps

    def get_input_dim(self):
        if not isinstance(self.vis_space, VectorSpace):
            raise TypeError("Can't describe "+str(type(self.vis_space))+" as a dimensionality number.")
        return self.vis_space.dim

    def get_output_dim(self):
        if not isinstance(self.hid_space, VectorSpace):
            raise TypeError("Can't describe "+str(type(self.hid_space))+" as a dimensionality number.")
        return self.hid_space.dim

    def get_input_space(self):
        return self.vis_space

    def get_output_space(self):
        return self.hid_space

    def get_params(self):
        return [param for param in self._params]

    def get_weights(self, borrow=False):

        weights ,= self.transformer.get_params()

        return weights.get_value(borrow=borrow)

    def get_weights_topo(self):
        return self.transformer.get_weights_topo()

    def get_weights_format(self):
        return ['v', 'h']


    def get_monitoring_channels(self, data):
        V = data
        theano_rng = RandomStreams(42)

        #TODO: re-enable this in the case where self.transformer
        #is a matrix multiply
        #norms = theano_norms(self.weights)

        H = self.mean_h_given_v(V)

        h = H.mean(axis=0)

        return { 'bias_hid_min' : T.min(self.bias_hid),
                 'bias_hid_mean' : T.mean(self.bias_hid),
                 'bias_hid_max' : T.max(self.bias_hid),
                 'bias_vis_min' : T.min(self.bias_vis),
                 'bias_vis_mean' : T.mean(self.bias_vis),
                 'bias_vis_max': T.max(self.bias_vis),
                 'h_min' : T.min(h),
                 'h_mean': T.mean(h),
                 'h_max' : T.max(h),
                 #'W_min' : T.min(self.weights),
                 #'W_max' : T.max(self.weights),
                 #'W_norms_min' : T.min(norms),
                 #'W_norms_max' : T.max(norms),
                 #'W_norms_mean' : T.mean(norms),
                'reconstruction_error' : self.reconstruction_error(V, theano_rng) }

    def get_monitoring_data_specs(self):
        """
        Get the data_specs describing the data for get_monitoring_channel.

        This implementation returns specification corresponding to unlabeled
        inputs.
        """
        return (self.get_input_space(), self.get_input_source())

    def ml_gradients(self, pos_v, neg_v):
        """
        Get the contrastive gradients given positive and negative phase
        visible units.

        Parameters
        ----------
        pos_v : tensor_like
            Theano symbolic representing a minibatch on the visible units,
            with the first dimension indexing training examples and the second
            indexing data dimensions (usually actual training data).
        neg_v : tensor_like
            Theano symbolic representing a minibatch on the visible units,
            with the first dimension indexing training examples and the second
            indexing data dimensions (usually reconstructions of the data or
            sampler particles from a persistent Markov chain).

        Returns
        -------
        grads : list
            List of Theano symbolic variables representing gradients with
            respect to model parameters, in the same order as returned by
            `params()`.

        Notes
        -----
        `pos_v` and `neg_v` need not have the same first dimension, i.e.
        minibatch size.
        """

        # taking the mean over each term independently allows for different
        # mini-batch sizes in the positive and negative phase.
        ml_cost = (self.free_energy_given_v(pos_v).mean() -
                   self.free_energy_given_v(neg_v).mean())

        grads = tensor.grad(ml_cost, self.get_params(),
                            consider_constant=[pos_v, neg_v])

        return grads


    def train_batch(self, dataset, batch_size):
        """ A default learning rule based on SML """
        self.learn_mini_batch(dataset.get_batch_design(batch_size))
        return True

    def learn_mini_batch(self, X):
        """ A default learning rule based on SML """

        if not hasattr(self, 'learn_func'):
            self.redo_theano()

        rval =  self.learn_func(X)

        return rval

    def redo_theano(self):
        """ Compiles the theano function for the default learning rule """

        init_names = dir(self)

        minibatch = tensor.matrix()

        optimizer = _SGDOptimizer(self, self.base_lr, self.anneal_start)

        sampler = sampler = BlockGibbsSampler(self, 0.5 + np.zeros((self.nchains, self.get_input_dim())), self.rng,
                                                  steps= self.sml_gibbs_steps)


        updates = training_updates(visible_batch=minibatch, model=self,
                                            sampler=sampler, optimizer=optimizer)

        self.learn_func = theano.function([minibatch], updates=updates)

        final_names = dir(self)

        self.register_names_to_del([name for name in final_names if name not in init_names])

    def gibbs_step_for_v(self, v, rng):
        """
        Do a round of block Gibbs sampling given visible configuration

        Parameters
        ----------
        v  : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples (or negative phase particles), with the first
            dimension indexing training examples and the second indexing data
            dimensions.
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible
            units.

        Returns
        -------
        v_sample : tensor_like
            Theano symbolic representing the new visible unit state after one
            round of Gibbs sampling.
        locals : dict
            Contains the following auxiliary state as keys (all symbolics
            except shape tuples):
             * `h_mean`: the returned value from `mean_h_given_v`
             * `h_mean_shape`: shape tuple indicating the size of `h_mean` and
               `h_sample`
             * `h_sample`: the stochastically sampled hidden units
             * `v_mean_shape`: shape tuple indicating the shape of `v_mean` and
               `v_sample`
             * `v_mean`: the returned value from `mean_v_given_h`
             * `v_sample`: the stochastically sampled visible units
        """
        h_mean = self.mean_h_given_v(v)
        assert h_mean.type.dtype == v.type.dtype
        # For binary hidden units
        # TODO: factor further to extend to other kinds of hidden units
        #       (e.g. spike-and-slab)
        h_sample = rng.binomial(size = h_mean.shape, n = 1 , p = h_mean, dtype=h_mean.type.dtype)
        assert h_sample.type.dtype == v.type.dtype
        # v_mean is always based on h_sample, not h_mean, because we don't
        # want h transmitting more than one bit of information per unit.
        v_mean = self.mean_v_given_h(h_sample)
        assert v_mean.type.dtype == v.type.dtype
        v_sample = self.sample_visibles([v_mean], v_mean.shape, rng)
        assert v_sample.type.dtype == v.type.dtype
        return v_sample, locals()

    def sample_visibles(self, params, shape, rng):
        """
        Stochastically sample the visible units given hidden unit
        configurations for a set of training examples.

        Parameters
        ----------
        params : list
            List of the necessary parameters to sample :math:`p(v|h)`. In the
            case of a binary-binary RBM this is a single-element list
            containing the symbolic representing :math:`p(v|h)`, as returned
            by `mean_v_given_h`.

        Returns
        -------
        vprime : tensor_like
            Theano symbolic representing stochastic samples from :math:`p(v|h)`
        """
        v_mean = params[0]
        return as_floatX(rng.uniform(size=shape) < v_mean)

    def input_to_h_from_v(self, v):
        """
        Compute the affine function (linear map plus bias) that serves as
        input to the hidden layer in an RBM.

        Parameters
        ----------
        v  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the one or several
            minibatches on the visible units, with the first dimension indexing
            training examples and the second indexing data dimensions.

        Returns
        -------
        a : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the input to each
            hidden unit for each training example.
        """

        if isinstance(v, tensor.Variable):
            return self.bias_hid + self.transformer.lmul(v)
        else:
            return [self.input_to_h_from_v(vis) for vis in v]

    def input_to_v_from_h(self, h):
        """
        Compute the affine function (linear map plus bias) that serves as
        input to the visible layer in an RBM.

        Parameters
        ----------
        h  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the one or several
            minibatches on the hidden units, with the first dimension indexing
            training examples and the second indexing data dimensions.

        Returns
        -------
        a : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the input to each
            visible unit for each row of h.
        """
        if isinstance(h, tensor.Variable):
            return self.bias_vis + self.transformer.lmul_T(h)
        else:
            return [self.input_to_v_from_h(hid) for hid in h]

    def upward_pass(self, v):
        """
        wrapper around mean_h_given_v method.  Called when RBM is accessed
        by mlp.HiddenLayer.
        """
        return self.mean_h_given_v(v)

    def mean_h_given_v(self, v):
        """
        Compute the mean activation of the hidden units given visible unit
        configurations for a set of training examples.

        Parameters
        ----------
        v  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the hidden unit
            states for a batch (or several) of training examples, with the
            first dimension indexing training examples and the second indexing
            data dimensions.

        Returns
        -------
        h : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the mean
            (deterministic) hidden unit activations given the visible units.
        """
        if isinstance(v, tensor.Variable):
            return nnet.sigmoid(self.input_to_h_from_v(v))
        else:
            return [self.mean_h_given_v(vis) for vis in v]

    def mean_v_given_h(self, h):
        """
        Compute the mean activation of the visibles given hidden unit
        configurations for a set of training examples.

        Parameters
        ----------
        h  : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the hidden unit
            states for a batch (or several) of training examples, with the
            first dimension indexing training examples and the second indexing
            hidden units.

        Returns
        -------
        vprime : tensor_like or list of tensor_likes
            Theano symbolic (or list thereof) representing the mean
            (deterministic) reconstruction of the visible units given the
            hidden units.
        """
        if isinstance(h, tensor.Variable):
            return nnet.sigmoid(self.input_to_v_from_h(h))
        else:
            return [self.mean_v_given_h(hid) for hid in h]

    def free_energy_given_v(self, v):
        """
        Calculate the free energy of a visible unit configuration by
        marginalizing over the hidden units.

        Parameters
        ----------
        v : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples, with the first dimension indexing training
            examples and the second indexing data dimensions.

        Returns
        -------
        f : tensor_like
            1-dimensional tensor (vector) representing the free energy
            associated with each row of v.
        """
        sigmoid_arg = self.input_to_h_from_v(v)
        return (-tensor.dot(v, self.bias_vis) -
                 nnet.softplus(sigmoid_arg).sum(axis=1))

    def free_energy(self, V):
        return self.free_energy_given_v(V)


    def free_energy_given_h(self, h):
        """
        Calculate the free energy of a hidden unit configuration by
        marginalizing over the visible units.

        Parameters
        ----------
        h : tensor_like
            Theano symbolic representing the hidden unit states, with the
            first dimension indexing training examples and the second
            indexing data dimensions.

        Returns
        -------
        f : tensor_like
            1-dimensional tensor (vector) representing the free energy
            associated with each row of v.
        """
        sigmoid_arg = self.input_to_v_from_h(h)
        return (-tensor.dot(h, self.bias_hid) -
                nnet.softplus(sigmoid_arg).sum(axis=1))

    def __call__(self, v):
        """
        Forward propagate (symbolic) input through this module, obtaining
        a representation to pass on to layers above.

        This just aliases the `mean_h_given_v()` function for syntactic
        sugar/convenience.
        """
        return self.mean_h_given_v(v)

    def reconstruction_error(self, v, rng):
        """
        Compute the mean-squared error (mean over examples, sum over units)
        across a minibatch after a Gibbs
        step starting from the training data.

        Parameters
        ----------
        v : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples, with the first dimension indexing training
            examples and the second indexing data dimensions.
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible
            units.

        Returns
        -------
        mse : tensor_like
            0-dimensional tensor (essentially a scalar) indicating the mean
            reconstruction error across the minibatch.

        Notes
        -----
        The reconstruction used to assess error samples only the hidden
        units. For the visible units, it uses the conditional mean.
        No sampling of the visible units is done, to reduce noise in the estimate.
        """
        sample, _locals = self.gibbs_step_for_v(v, rng)
        return ((_locals['v_mean'] - v) ** 2).sum(axis=1).mean()
예제 #36
0
파일: rbm.py 프로젝트: mruan/pylearn2
    def __init__(self, nvis = None, nhid = None,
            vis_space = None,
            hid_space = None,
            transformer = None,
            irange=0.5, rng=None, init_bias_vis = None,
            init_bias_vis_marginals = None, init_bias_hid=0.0,
            base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1,
            random_patches_src = None,
            monitor_reconstruction = False):

        """
        Construct an RBM object.

        Parameters
        ----------
        nvis : int
            Number of visible units in the model.
            (Specifying this implies that the model acts on a vector,
            i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) )
        nhid : int
            Number of hidden units in the model.
            (Specifying this implies that the model acts on a vector)
        vis_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM acts on. Don't specify if you used nvis / hid
        hid_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM's hidden units live in. Don't specify if you used
            nvis / nhid
        init_bias_vis_marginals: either None, or a Dataset to use to initialize
            the visible biases to the inverse sigmoid of the data marginals
        irange : float, optional
            The size of the initial interval around 0 for weights.
        rng : RandomState object or seed
            NumPy RandomState object to use when initializing parameters
            of the model, or (integer) seed to use to create one.
        init_bias_vis : array_like, optional
            Initial value of the visible biases, broadcasted as necessary.
        init_bias_hid : array_like, optional
            initial value of the hidden biases, broadcasted as necessary.
        monitor_reconstruction : if True, will request a monitoring channel to monitor
            reconstruction error
        random_patches_src: Either None, or a Dataset from which to draw random patches
            in order to initialize the weights. Patches will be multiplied by irange

        Parameters for default SML learning rule:

            base_lr : the base learning rate
            anneal_start : number of steps after which to start annealing on a 1/t schedule
            nchains: number of negative chains
            sml_gibbs_steps: number of gibbs steps to take per update

        """

        Model.__init__(self)
        Block.__init__(self)

        if init_bias_vis_marginals is not None:
            assert init_bias_vis is None
            X = init_bias_vis_marginals.X
            assert X.min() >= 0.0
            assert X.max() <= 1.0

            marginals = X.mean(axis=0)

            #rescale the marginals a bit to avoid NaNs
            init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals)


        if init_bias_vis is None:
            init_bias_vis = 0.0

        if rng is None:
            # TODO: global rng configuration stuff.
            rng = numpy.random.RandomState(1001)
        self.rng = rng

        if vis_space is None:
            #if we don't specify things in terms of spaces and a transformer,
            #assume dense matrix multiplication and work off of nvis, nhid
            assert hid_space is None
            assert transformer is None or isinstance(transformer,MatrixMul)
            assert nvis is not None
            assert nhid is not None

            if transformer is None:
                if random_patches_src is None:
                    W = rng.uniform(-irange, irange, (nvis, nhid))
                else:
                    if hasattr(random_patches_src, '__array__'):
                        W = irange * random_patches_src.T
                        assert W.shape == (nvis, nhid)
                    else:
                        #assert type(irange) == type(0.01)
                        #assert irange == 0.01
                        W = irange * random_patches_src.get_batch_design(nhid).T

                self.transformer = MatrixMul(  sharedX(
                        W,
                        name='W',
                        borrow=True
                    )
                )
            else:
                self.transformer = transformer

            self.vis_space = VectorSpace(nvis)
            self.hid_space = VectorSpace(nhid)
        else:
            assert hid_space is not None
            assert transformer is not None
            assert nvis is None
            assert nhid is None

            self.vis_space = vis_space
            self.hid_space = hid_space
            self.transformer = transformer


        try:
            b_vis = self.vis_space.get_origin()
            b_vis += init_bias_vis
        except ValueError:
            raise ValueError("bad shape or value for init_bias_vis")
        self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True)

        try:
            b_hid = self.hid_space.get_origin()
            b_hid += init_bias_hid
        except ValueError:
            raise ValueError('bad shape or value for init_bias_hid')
        self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True)

        self.random_patches_src = random_patches_src
        self.register_names_to_del(['random_patches_src'])


        self.__dict__.update(nhid=nhid, nvis=nvis)
        self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid])

        self.base_lr = base_lr
        self.anneal_start = anneal_start
        self.nchains = nchains
        self.sml_gibbs_steps = sml_gibbs_steps
예제 #37
0
nv = 3
nh = 4

vW = rng.randn(nv, nh)
W = sharedX(vW)
vbv = as_floatX(rng.randn(nv))
bv = T.as_tensor_variable(vbv)
bv.tag.test_value = vbv
vbh = as_floatX(rng.randn(nh))
bh = T.as_tensor_variable(vbh)
bh.tag.test_value = bh
vsigma = as_floatX(rng.uniform(0.1, 5))
sigma = T.as_tensor_variable(vsigma)
sigma.tag.test_value = vsigma

E = GRBM_Type_1(transformer=MatrixMul(W),
                bias_vis=bv,
                bias_hid=bh,
                sigma=sigma)

V = T.matrix()
V.tag.test_value = as_floatX(rng.rand(test_m, nv))
H = T.matrix()
H.tag.test_value = as_floatX(rng.rand(test_m, nh))

E_func = function([V, H], E([V, H]))
F_func = function([V], E.free_energy(V))
log_P_H_given_V_func = function([H, V], E.log_P_H_given_V(H, V))
score_func = function([V], E.score(V))

F_of_V = E.free_energy(V)
예제 #38
0
    def __init__(self, nvis = None, nhid = None,
            vis_space = None,
            hid_space = None,
            transformer = None,
            irange=0.5, rng=None, init_bias_vis = None,
            init_bias_vis_marginals = None, init_bias_hid=0.0,
            base_lr = 1e-3, anneal_start = None, nchains = 100, sml_gibbs_steps = 1,
            random_patches_src = None,
            monitor_reconstruction = False):

        """
        Construct an RBM object.

        Parameters
        ----------
        nvis : int
            Number of visible units in the model.
            (Specifying this implies that the model acts on a vector,
            i.e. it sets vis_space = pylearn2.space.VectorSpace(nvis) )
        nhid : int
            Number of hidden units in the model.
            (Specifying this implies that the model acts on a vector)
        vis_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM acts on. Don't specify if you used nvis / hid
        hid_space:
            A pylearn2.space.Space object describing what kind of vector
            space the RBM's hidden units live in. Don't specify if you used
            nvis / nhid
        init_bias_vis_marginals: either None, or a Dataset to use to initialize
            the visible biases to the inverse sigmoid of the data marginals
        irange : float, optional
            The size of the initial interval around 0 for weights.
        rng : RandomState object or seed
            NumPy RandomState object to use when initializing parameters
            of the model, or (integer) seed to use to create one.
        init_bias_vis : array_like, optional
            Initial value of the visible biases, broadcasted as necessary.
        init_bias_hid : array_like, optional
            initial value of the hidden biases, broadcasted as necessary.
        monitor_reconstruction : if True, will request a monitoring channel to monitor
            reconstruction error
        random_patches_src: Either None, or a Dataset from which to draw random patches
            in order to initialize the weights. Patches will be multiplied by irange

        Parameters for default SML learning rule:

            base_lr : the base learning rate
            anneal_start : number of steps after which to start annealing on a 1/t schedule
            nchains: number of negative chains
            sml_gibbs_steps: number of gibbs steps to take per update

        """

        Model.__init__(self)
        Block.__init__(self)

        if init_bias_vis_marginals is not None:
            assert init_bias_vis is None
            X = init_bias_vis_marginals.X
            assert X.min() >= 0.0
            assert X.max() <= 1.0

            marginals = X.mean(axis=0)

            #rescale the marginals a bit to avoid NaNs
            init_bias_vis = inverse_sigmoid_numpy(.01 + .98 * marginals)


        if init_bias_vis is None:
            init_bias_vis = 0.0

        if rng is None:
            # TODO: global rng configuration stuff.
            rng = numpy.random.RandomState(1001)
        self.rng = rng

        if vis_space is None:
            #if we don't specify things in terms of spaces and a transformer,
            #assume dense matrix multiplication and work off of nvis, nhid
            assert hid_space is None
            assert transformer is None or isinstance(transformer,MatrixMul)
            assert nvis is not None
            assert nhid is not None

            if transformer is None:
                if random_patches_src is None:
                    W = rng.uniform(-irange, irange, (nvis, nhid))
                else:
                    if hasattr(random_patches_src, '__array__'):
                        W = irange * random_patches_src.T
                        assert W.shape == (nvis, nhid)
                    else:
                        #assert type(irange) == type(0.01)
                        #assert irange == 0.01
                        W = irange * random_patches_src.get_batch_design(nhid).T

                self.transformer = MatrixMul(  sharedX(
                        W,
                        name='W',
                        borrow=True
                    )
                )
            else:
                self.transformer = transformer

            self.vis_space = VectorSpace(nvis)
            self.hid_space = VectorSpace(nhid)
        else:
            assert hid_space is not None
            assert transformer is not None
            assert nvis is None
            assert nhid is None

            self.vis_space = vis_space
            self.hid_space = hid_space
            self.transformer = transformer


        try:
            b_vis = self.vis_space.get_origin()
            b_vis += init_bias_vis
        except ValueError:
            raise ValueError("bad shape or value for init_bias_vis")
        self.bias_vis = sharedX(b_vis, name='bias_vis', borrow=True)

        try:
            b_hid = self.hid_space.get_origin()
            b_hid += init_bias_hid
        except ValueError:
            raise ValueError('bad shape or value for init_bias_hid')
        self.bias_hid = sharedX(b_hid, name='bias_hid', borrow=True)

        self.random_patches_src = random_patches_src
        self.register_names_to_del(['random_patches_src'])


        self.__dict__.update(nhid=nhid, nvis=nvis)
        self._params = safe_union(self.transformer.get_params(), [self.bias_vis, self.bias_hid])

        self.base_lr = base_lr
        self.anneal_start = anneal_start
        self.nchains = nchains
        self.sml_gibbs_steps = sml_gibbs_steps
예제 #39
0
class IsingHidden(HiddenLayer):
    """

    A hidden layer with h being a vector in {-1, 1}^dim,
    implementing the energy function term

    -v^T Wh -b^T h

    where W and b are parameters of this layer, and v is
    the upward state of the layer below

    """

    def __init__(self,
            dim,
            layer_name,
            irange = None,
            sparse_init = None,
            sparse_stdev = 1.,
            include_prob = 1.0,
            init_bias = 0.,
            W_lr_scale = None,
            b_lr_scale = None,
            max_col_norm = None):
        """

            include_prob: probability of including a weight element in the set
                    of weights initialized to U(-irange, irange). If not included
                    it is initialized to 0.

        """
        self.__dict__.update(locals())
        del self.self

        self.b = sharedX( np.zeros((self.dim,)) + init_bias, name = layer_name + '_b')

    def get_lr_scalers(self):

        if not hasattr(self, 'W_lr_scale'):
            self.W_lr_scale = None

        if not hasattr(self, 'b_lr_scale'):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        rng = self.dbm.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange,
                                 self.irange,
                                 (self.input_dim, self.dim)) * \
                    (rng.uniform(0.,1., (self.input_dim, self.dim))
                     < self.include_prob)
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.dim))
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W ,= self.transformer.get_params()
        assert W.name is not None

    def censor_updates(self, updates):

        if self.max_col_norm is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

    def get_total_state_space(self):
        return VectorSpace(self.dim)

    def get_params(self):
        assert self.b.name is not None
        W ,= self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, 'dtype')
        W ,= self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W ,= self.transformer.get_params()
        return W.get_value()

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases, recenter = False):
        self.b.set_value(biases)
        if recenter:
            assert self.center
            if self.pool_size != 1:
                raise NotImplementedError()
            self.offset.set_value(sigmoid_numpy(self.b.get_value()))

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ('v', 'h')

    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W ,= self.transformer.get_params()

        W = W.T

        W = W.reshape((self.detector_layer_dim, self.input_space.shape[0],
            self.input_space.shape[1], self.input_space.nchannels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    def upward_state(self, total_state):
        return total_state

    def downward_state(self, total_state):
        return total_state

    def get_monitoring_channels(self):

        W ,= self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict([
              ('row_norms_min'  , row_norms.min()),
              ('row_norms_mean' , row_norms.mean()),
              ('row_norms_max'  , row_norms.max()),
              ('col_norms_min'  , col_norms.min()),
              ('col_norms_mean' , col_norms.mean()),
              ('col_norms_max'  , col_norms.max()),
            ])

    def get_monitoring_channels_from_state(self, state):

        P = state

        rval = OrderedDict()

        vars_and_prefixes = [ (P,'') ]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over e*x*amples"
            # The x and u are included in the name because otherwise its hard
            # to remember which axis is which when reading the monitor
            # I use inner.outer rather than outer_of_inner or something like that
            # because I want mean_x.* to appear next to each other in the alphabetical
            # list, as these are commonly plotted together
            for key, val in [
                    ('max_x.max_u', v_max.max()),
                    ('max_x.mean_u', v_max.mean()),
                    ('max_x.min_u', v_max.min()),
                    ('min_x.max_u', v_min.max()),
                    ('min_x.mean_u', v_min.mean()),
                    ('min_x.min_u', v_min.min()),
                    ('range_x.max_u', v_range.max()),
                    ('range_x.mean_u', v_range.mean()),
                    ('range_x.min_u', v_range.min()),
                    ('mean_x.max_u', v_mean.max()),
                    ('mean_x.mean_u', v_mean.mean()),
                    ('mean_x.min_u', v_mean.min())
                    ]:
                rval[prefix+key] = val

        return rval

    def sample(self, state_below = None, state_above = None,
            layer_above = None,
            theano_rng = None):

        if theano_rng is None:
            raise ValueError("theano_rng is required; it just defaults to None so that it may appear after layer_above / state_above in the list.")

        if state_above is not None:
            msg = layer_above.downward_message(state_above)
        else:
            msg = None

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if msg != None:
            z = z + msg

        on_prob = T.nnet.sigmoid(2. * z)

        samples = theano_rng.binomial(p = on_prob, n=1, size=on_prob.shape, dtype=on_prob.dtype) * 2. - 1.

        return samples

    def downward_message(self, downward_state):
        rval = self.transformer.lmul_T(downward_state)

        if self.requires_reformat:
            rval = self.desired_space.format_as(rval, self.input_space)

        return rval

    def init_mf_state(self):
        raise NotImplementedError("This is just a copy-paste of BVMP")
        # work around theano bug with broadcasted vectors
        z = T.alloc(0., self.dbm.batch_size, self.detector_layer_dim).astype(self.b.dtype) + \
                self.b.dimshuffle('x', 0)
        rval = max_pool_channels(z = z,
                pool_size = self.pool_size)
        return rval

    def make_state(self, num_examples, numpy_rng):
        """ Returns a shared variable containing an actual state
           (not a mean field state) for this variable.
        """
        driver = numpy_rng.uniform(0.,1., (num_examples, self.dim))
        on_prob = sigmoid_numpy(2. * self.b.get_value())
        sample = 2. * (driver < on_prob) - 1.

        rval = sharedX(sample, name = 'v_sample_shared')

        return rval

    def make_symbolic_state(self, num_examples, theano_rng):
        mean = T.nnet.sigmoid(2. * self.b)
        rval = theano_rng.binomial(size=(num_examples, self.nvis), p=mean)
        rval = 2. * (rval) - 1.

        return rval

    def expected_energy_term(self, state, average, state_below, average_below):

        # state = Print('h_state', attrs=['min', 'max'])(state)

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        # Energy function is linear so it doesn't matter if we're averaging or not
        # Specifically, our terms are -u^T W d - b^T d where u is the upward state of layer below
        # and d is the downward state of this layer

        bias_term = T.dot(state, self.b)
        weights_term = (self.transformer.lmul(state_below) * state).sum(axis=1)

        rval = -bias_term - weights_term

        assert rval.ndim == 1

        return rval

    def linear_feed_forward_approximation(self, state_below):
        """
        Used to implement TorontoSparsity. Unclear exactly what properties of it are
        important or how to implement it for other layers.

        Properties it must have:
            output is same kind of data structure (ie, tuple of theano 2-tensors)
            as mf_update

        Properties it probably should have for other layer types:
            An infinitesimal change in state_below or the parameters should cause the same sign of change
            in the output of linear_feed_forward_approximation and in mf_update

            Should not have any non-linearities that cause the gradient to shrink

            Should disregard top-down feedback
        """

        z = self.transformer.lmul(state_below) + self.b

        if self.pool_size != 1:
            # Should probably implement sum pooling for the non-pooled version,
            # but in reality it's not totally clear what the right answer is
            raise NotImplementedError()

        return z, z

    def mf_update(self, state_below, state_above, layer_above = None, double_weights = False, iter_name = None):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        if iter_name is None:
            iter_name = 'anon'

        if state_above is not None:
            assert layer_above is not None
            msg = layer_above.downward_message(state_above)
            msg.name = 'msg_from_'+layer_above.layer_name+'_to_'+self.layer_name+'['+iter_name+']'
        else:
            msg = None

        if double_weights:
            state_below = 2. * state_below
            state_below.name = self.layer_name + '_'+iter_name + '_2state'
        z = self.transformer.lmul(state_below) + self.b
        if self.layer_name is not None and iter_name is not None:
            z.name = self.layer_name + '_' + iter_name + '_z'
        if msg is not None:
            z = z + msg
        h = T.tanh(z)

        return h
예제 #40
0
파일: maxout.py 프로젝트: bbudescu/pylearn2
class Maxout(Layer):
    """
    A hidden layer that does max pooling over groups of linear
    units. If you use this code in a research project, please
    cite

    "Maxout Networks" Ian J. Goodfellow, David Warde-Farley,
    Mehdi Mirza, Aaron Courville, and Yoshua Bengio. ICML 2013
    """

    def __str__(self):
        return "Maxout"

    def __init__(
        self,
        layer_name,
        num_units,
        num_pieces,
        pool_stride=None,
        randomize_pools=False,
        irange=None,
        sparse_init=None,
        sparse_stdev=1.0,
        include_prob=1.0,
        init_bias=0.0,
        W_lr_scale=None,
        b_lr_scale=None,
        max_col_norm=None,
        max_row_norm=None,
        mask_weights=None,
        min_zero=False,
    ):
        """
            layer_name: A name for this layer that will be prepended to
                        monitoring channels related to this layer.
            num_units: The number of maxout units to use in this layer.
            num_pieces: The number of linear pieces to use in each maxout
                        unit.
            pool_stride: The distance between the start of each max pooling
                        region. Defaults to num_pieces, which makes the
                        pooling regions disjoint. If set to a smaller number,
                        can do overlapping pools.
            randomize_pools: Does max pooling over randomized subsets of
                        the linear responses, rather than over sequential
                        subsets.
            irange: if specified, initializes each weight randomly in
                U(-irange, irange)
            sparse_init: if specified, irange must not be specified.
                        This is an integer specifying how many weights to make
                        non-zero. All non-zero weights will be initialized
                        randomly in N(0, sparse_stdev^2)
            include_prob: probability of including a weight element in the set
               of weights initialized to U(-irange, irange). If not included
               a weight is initialized to 0. This defaults to 1.
            init_bias: All biases are initialized to this number
            W_lr_scale: The learning rate on the weights for this layer is
                multiplied by this scaling factor
            b_lr_scale: The learning rate on the biases for this layer is
                multiplied by this scaling factor
            max_col_norm: The norm of each column of the weight matrix is
                constrained to have at most this norm. If unspecified, no
                constraint. Constraint is enforced by re-projection (if
                necessary) at the end of each update.
            max_row_norm: Like max_col_norm, but applied to the rows.
            mask_weights: A binary matrix multiplied by the weights after each
                         update, allowing you to restrict their connectivity.
            min_zero: If true, includes a zero in the set we take a max over
                    for each maxout unit. This is equivalent to pooling over
                    rectified linear units.
        """

        detector_layer_dim = num_units * num_pieces
        pool_size = num_pieces

        if pool_stride is None:
            pool_stride = pool_size

        self.__dict__.update(locals())
        del self.self

        self.b = sharedX(np.zeros((self.detector_layer_dim,)) + init_bias, name=layer_name + "_b")

        if max_row_norm is not None:
            raise NotImplementedError()

    def get_lr_scalers(self):

        if not hasattr(self, "W_lr_scale"):
            self.W_lr_scale = None

        if not hasattr(self, "b_lr_scale"):
            self.b_lr_scale = None

        rval = OrderedDict()

        if self.W_lr_scale is not None:
            W, = self.transformer.get_params()
            rval[W] = self.W_lr_scale

        if self.b_lr_scale is not None:
            rval[self.b] = self.b_lr_scale

        return rval

    def set_input_space(self, space):
        """ Note: this resets parameters! """

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        if not ((self.detector_layer_dim - self.pool_size) % self.pool_stride == 0):
            if self.pool_stride == self.pool_size:
                raise ValueError(
                    "detector_layer_dim = %d, pool_size = %d. Should be divisible but remainder is %d"
                    % (self.detector_layer_dim, self.pool_size, self.detector_layer_dim % self.pool_size)
                )
            raise ValueError()

        self.h_space = VectorSpace(self.detector_layer_dim)
        self.pool_layer_dim = (self.detector_layer_dim - self.pool_size) / self.pool_stride + 1
        self.output_space = VectorSpace(self.pool_layer_dim)

        rng = self.mlp.rng
        if self.irange is not None:
            assert self.sparse_init is None
            W = rng.uniform(-self.irange, self.irange, (self.input_dim, self.detector_layer_dim)) * (
                rng.uniform(0.0, 1.0, (self.input_dim, self.detector_layer_dim)) < self.include_prob
            )
        else:
            assert self.sparse_init is not None
            W = np.zeros((self.input_dim, self.detector_layer_dim))

            def mask_rejects(idx, i):
                if self.mask_weights is None:
                    return False
                return self.mask_weights[idx, i] == 0.0

            for i in xrange(self.detector_layer_dim):
                assert self.sparse_init <= self.input_dim
                for j in xrange(self.sparse_init):
                    idx = rng.randint(0, self.input_dim)
                    while W[idx, i] != 0 or mask_rejects(idx, i):
                        idx = rng.randint(0, self.input_dim)
                    W[idx, i] = rng.randn()
            W *= self.sparse_stdev

        W = sharedX(W)
        W.name = self.layer_name + "_W"

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None

        if not hasattr(self, "randomize_pools"):
            self.randomize_pools = False

        if self.randomize_pools:
            permute = np.zeros((self.detector_layer_dim, self.detector_layer_dim))
            for j in xrange(self.detector_layer_dim):
                i = rng.randint(self.detector_layer_dim)
                permute[i, j] = 1
            self.permute = sharedX(permute)

        if self.mask_weights is not None:
            expected_shape = (self.input_dim, self.detector_layer_dim)
            if expected_shape != self.mask_weights.shape:
                raise ValueError(
                    "Expected mask with shape " + str(expected_shape) + " but got " + str(self.mask_weights.shape)
                )
            self.mask = sharedX(self.mask_weights)

    def censor_updates(self, updates):

        # Patch old pickle files
        if not hasattr(self, "mask_weights"):
            self.mask_weights = None

        if self.mask_weights is not None:
            W, = self.transformer.get_params()
            if W in updates:
                updates[W] = updates[W] * self.mask

        if self.max_col_norm is not None:
            assert self.max_row_norm is None
            W, = self.transformer.get_params()
            if W in updates:
                updated_W = updates[W]
                col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

    def get_params(self):
        assert self.b.name is not None
        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b not in rval
        rval.append(self.b)
        return rval

    def get_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, "dtype")
        W, = self.transformer.get_params()
        return coeff * T.sqr(W).sum()

    def get_l1_weight_decay(self, coeff):
        if isinstance(coeff, str):
            coeff = float(coeff)
        assert isinstance(coeff, float) or hasattr(coeff, "dtype")
        W, = self.transformer.get_params()
        return coeff * T.abs(W).sum()

    def get_weights(self):
        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()
        W = W.get_value()

        if not hasattr(self, "randomize_pools"):
            self.randomize_pools = False

        if self.randomize_pools:
            warnings.warn(
                "randomize_pools makes get_weights multiply by the permutation matrix. "
                "If you call set_weights(W) and then call get_weights(), the return value will "
                "WP not W."
            )
            P = self.permute.get_value()
            return np.dot(W, P)

        return W

    def set_weights(self, weights):
        W, = self.transformer.get_params()
        W.set_value(weights)

    def set_biases(self, biases):
        self.b.set_value(biases)

    def get_biases(self):
        return self.b.get_value()

    def get_weights_format(self):
        return ("v", "h")

    def get_weights_view_shape(self):
        total = self.detector_layer_dim
        cols = self.pool_size
        if cols == 1:
            # Let the PatchViewer decide how to arrange the units
            # when they're not pooled
            raise NotImplementedError()
        # When they are pooled, make each pooling unit have one row
        rows = total // cols
        if rows * cols < total:
            rows = rows + 1
        return rows, cols

    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        # There was an implementation of this, but it was broken
        raise NotImplementedError()

    def get_monitoring_channels(self):

        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        return OrderedDict(
            [
                ("row_norms_min", row_norms.min()),
                ("row_norms_mean", row_norms.mean()),
                ("row_norms_max", row_norms.max()),
                ("col_norms_min", col_norms.min()),
                ("col_norms_mean", col_norms.mean()),
                ("col_norms_max", col_norms.max()),
            ]
        )

    def get_monitoring_channels_from_state(self, state):

        P = state

        rval = OrderedDict()

        if self.pool_size == 1:
            vars_and_prefixes = [(P, "")]
        else:
            vars_and_prefixes = [(P, "p_")]

        for var, prefix in vars_and_prefixes:
            v_max = var.max(axis=0)
            v_min = var.min(axis=0)
            v_mean = var.mean(axis=0)
            v_range = v_max - v_min

            # max_x.mean_u is "the mean over *u*nits of the max over e*x*amples"
            # The x and u are included in the name because otherwise its hard
            # to remember which axis is which when reading the monitor
            # I use inner.outer rather than outer_of_inner or something like that
            # because I want mean_x.* to appear next to each other in the alphabetical
            # list, as these are commonly plotted together
            for key, val in [
                ("max_x.max_u", v_max.max()),
                ("max_x.mean_u", v_max.mean()),
                ("max_x.min_u", v_max.min()),
                ("min_x.max_u", v_min.max()),
                ("min_x.mean_u", v_min.mean()),
                ("min_x.min_u", v_min.min()),
                ("range_x.max_u", v_range.max()),
                ("range_x.mean_u", v_range.mean()),
                ("range_x.min_u", v_range.min()),
                ("mean_x.max_u", v_mean.max()),
                ("mean_x.mean_u", v_mean.mean()),
                ("mean_x.min_u", v_mean.min()),
            ]:
                rval[prefix + key] = val

        return rval

    def fprop(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0])
                        )
                    assert reduce(lambda x, y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, "randomize_pools"):
            self.randomize_pools = False

        if not hasattr(self, "pool_stride"):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, "min_zero"):
            self.min_zero = False

        if self.min_zero:
            p = T.zeros_like(z)
        else:
            p = None

        last_start = self.detector_layer_dim - self.pool_size
        for i in xrange(self.pool_size):
            cur = z[:, i : last_start + i + 1 : self.pool_stride]
            if p is None:
                p = cur
            else:
                p = T.maximum(cur, p)

        p.name = self.layer_name + "_p_"

        return p

    def foo(self, state_below):

        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError(
                            "self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0])
                        )
                    assert reduce(lambda x, y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)

        z = self.transformer.lmul(state_below) + self.b

        if not hasattr(self, "randomize_pools"):
            self.randomize_pools = False

        if not hasattr(self, "pool_stride"):
            self.pool_stride = self.pool_size

        if self.randomize_pools:
            z = T.dot(z, self.permute)

        if not hasattr(self, "min_zero"):
            self.min_zero = False

        if self.min_zero:
            p = T.zeros_like(z)
        else:
            p = None

        last_start = self.detector_layer_dim - self.pool_size

        pooling_stack = []
        for i in xrange(self.pool_size):
            cur = z[:, i : last_start + i + 1 : self.pool_stride]
            cur = cur.reshape((cur.shape[0], cur.shape[1], 1))
            assert cur.ndim == 3
            pooling_stack.append(cur)
        if self.min_zero:
            pooling_stack.append(T.zeros_like(cur))
        pooling_stack = T.concatenate(pooling_stack, axis=2)
        p = pooling_stack.max(axis=2)
        counts = (T.eq(pooling_stack, p.dimshuffle(0, 1, "x"))).sum(axis=0)

        p.name = self.layer_name + "_p_"

        return p, counts
class WeightedLogNormalLogLikelihood(Layer):

    __metaclass__ = RNNWrapper

    def __init__(self, layer_name, irange=0.0, init_bias=0.):
        super(WeightedLogNormalLogLikelihood, self).__init__()
        self.__dict__.update(locals())
        del self.self
        self.dim = 2

        self.b = sharedX(np.zeros((self.dim,)) + init_bias,
                             name=(layer_name + '_b'))

    @wraps(Layer.set_input_space)
    def set_input_space(self, space):

        self.input_space = space

        if isinstance(space, VectorSpace):
            self.requires_reformat = False
            self.input_dim = space.dim
        else:
            self.requires_reformat = True
            self.input_dim = space.get_total_dimension()
            self.desired_space = VectorSpace(self.input_dim)

        self.output_space = VectorSpace(self.dim)

        rng = self.mlp.rng

        W = rng.uniform(-self.irange,
                        self.irange,
                        (self.input_dim, self.dim))

        W = sharedX(W)
        W.name = self.layer_name + '_W'

        self.transformer = MatrixMul(W)

        W, = self.transformer.get_params()
        assert W.name is not None


    @wraps(Layer.get_params)
    def get_params(self):

        W, = self.transformer.get_params()
        assert W.name is not None
        rval = self.transformer.get_params()
        assert not isinstance(rval, set)
        rval = list(rval)
        assert self.b.name is not None
        assert self.b not in rval
        rval.append(self.b)
        return rval


    @wraps(Layer.get_weights)
    def get_weights(self):

        if self.requires_reformat:
            # This is not really an unimplemented case.
            # We actually don't know how to format the weights
            # in design space. We got the data in topo space
            # and we don't have access to the dataset
            raise NotImplementedError()
        W, = self.transformer.get_params()

        W = W.get_value()

        return W

    @wraps(Layer.set_weights)
    def set_weights(self, weights):

        W, = self.transformer.get_params()
        W.set_value(weights)

    @wraps(Layer.set_biases)
    def set_biases(self, biases):

        self.b.set_value(biases)

    @wraps(Layer.get_biases)
    def get_biases(self):
        """
        .. todo::
            WRITEME
        """
        return self.b.get_value()

    @wraps(Layer.get_weights_format)
    def get_weights_format(self):

        return ('v', 'h')

    @wraps(Layer.get_weights_topo)
    def get_weights_topo(self):

        if not isinstance(self.input_space, Conv2DSpace):
            raise NotImplementedError()

        W, = self.transformer.get_params()

        W = W.T

        W = W.reshape((self.dim, self.input_space.shape[0],
                       self.input_space.shape[1],
                       self.input_space.num_channels))

        W = Conv2DSpace.convert(W, self.input_space.axes, ('b', 0, 1, 'c'))

        return function([], W)()

    @wraps(Layer.get_layer_monitoring_channels)
    def get_layer_monitoring_channels(self, state_below=None,
                                      state=None, targets=None):
        W, = self.transformer.get_params()

        assert W.ndim == 2

        sq_W = T.sqr(W)

        row_norms = T.sqrt(sq_W.sum(axis=1))
        col_norms = T.sqrt(sq_W.sum(axis=0))

        rval = OrderedDict([('row_norms_min',  row_norms.min()),
                            ('row_norms_mean', row_norms.mean()),
                            ('row_norms_max',  row_norms.max()),
                            ('col_norms_min',  col_norms.min()),
                            ('col_norms_mean', col_norms.mean()),
                            ('col_norms_max',  col_norms.max()), ])

        if (state is not None) or (state_below is not None):
            if state is None:
                state = self.fprop(state_below)

            mx = state.max(axis=0)
            mean = state.mean(axis=0)
            mn = state.min(axis=0)
            rg = mx - mn

            rval['range_x_max_u'] = rg.max()
            rval['range_x_mean_u'] = rg.mean()
            rval['range_x_min_u'] = rg.min()

            rval['max_x_max_u'] = mx.max()
            rval['max_x_mean_u'] = mx.mean()
            rval['max_x_min_u'] = mx.min()

            rval['mean_x_max_u'] = mean.max()
            rval['mean_x_mean_u'] = mean.mean()
            rval['mean_x_min_u'] = mean.min()

            rval['min_x_max_u'] = mn.max()
            rval['min_x_mean_u'] = mn.mean()
            rval['min_x_min_u'] = mn.min()

        if targets:
            y_target = targets[:, 0]
            cost_multiplier = targets[:, 1]
            mean = state[:, 0]
            sigma = T.exp(state[:, 1])
            nll = self.logprob(y_target, mean, sigma)
            prob_vector = T.exp(-nll)
            rval['prob'] = (prob_vector * cost_multiplier).sum() / (1.0 * cost_multiplier.sum())
            rval['ppl'] = T.exp((nll* cost_multiplier).sum() / (1.0 * cost_multiplier.sum()))
        return rval

    def _linear_part(self, state_below):
        """
        Parameters
        ----------
        state_below : member of input_space
        Returns
        -------
        output : theano matrix
            Affine transformation of state_below
        """
        self.input_space.validate(state_below)

        if self.requires_reformat:
            state_below = self.input_space.format_as(state_below,
                                                     self.desired_space)

        z = self.transformer.lmul(state_below)
        z += self.b

        if self.layer_name is not None:
            z.name = self.layer_name + '_z'

        return z

    @wraps(Layer.fprop)
    def fprop(self, state_below):
        p = self._linear_part(state_below)
        return p


    def logprob(self, y_target, mean, sigma):
        return (((T.log(y_target) - mean) ** 2 / (2 * sigma ** 2) + T.log(y_target * sigma * T.sqrt(2 * np.pi))))

    @wraps(Layer.cost)
    def cost(self, Y, Y_hat):
        mean = Y_hat[:, 0] #+ 1.6091597151048114
        sigma = T.exp(Y_hat[:, 1]) #+ 0.26165911509618789
        y_target = Y[:, 0]
        cost_multiplier = Y[:, 1]
        return (self.logprob(y_target, mean, sigma) * cost_multiplier).sum() / (1.0 * cost_multiplier.sum())