Esempio n. 1
1
    def _init_params(self):
        # Left weight matrix
        self.W_hh = theano.shared(
            self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="W_%s" % self.name
        )
        self.params = [self.W_hh]
        # Right weight matrix
        self.U_hh = theano.shared(
            self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="U_%s" % self.name
        )
        self.params += [self.U_hh]
        # Bias
        self.b_hh = theano.shared(self.bias_fn(self.n_hids, self.bias_scale, self.rng), name="b_%s" % self.name)
        self.params += [self.b_hh]
        # gaters
        # if self.conv_mode == "conv":
        self.GW_hh = theano.shared(numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)), name="GW_%s" % self.name)
        self.params += [self.GW_hh]
        self.GU_hh = theano.shared(numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)), name="GU_%s" % self.name)
        self.params += [self.GU_hh]
        self.Gb_hh = theano.shared(self.bias_fn(3, self.bias_scale, self.rng), name="Gb_%s" % self.name)
        self.params += [self.Gb_hh]

        self.params_grad_scale = [self.grad_scale for x in self.params]
        self.restricted_params = [x for x in self.params]
        if self.weight_noise:
            self.nW_hh = theano.shared(self.W_hh.get_value() * 0, name="noise_" + self.W_hh.name)
            self.nU_hh = theano.shared(self.U_hh.get_value() * 0, name="noise_" + self.U_hh.name)
            self.nb_hh = theano.shared(self.b_hh.get_value() * 0, name="noise_" + self.b_hh.name)
            self.noise_params = [self.nW_hh, self.nU_hh, self.nb_hh]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
Esempio n. 2
0
    def _init_params(self):
        self.W_hhs = []
        self.W_shortp = []
        for dx in xrange(self.n_layers):
            W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers],
                                        self.n_hids[dx],
                                        self.sparsity[dx],
                                        self.scale[dx],
                                        rng=self.rng)
            self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" %
                                       (dx,self.name)))

            if dx > 0:
                W_shp = self.init_fn[dx](self.n_hids[self.n_layers-1],
                                         self.n_hids[dx],
                                         self.sparsity[dx],
                                         self.scale[dx],
                                         rng=self.rng)
                self.W_shortp.append(theano.shared(value=W_shp,
                                               name='W_s%d_%s'%(dx,self.name)))
        self.params = [x for x in self.W_hhs] +\
                [x for x in self.W_shortp]

        self.params_grad_scale = [self.grad_scale for x in self.params]
        self.restricted_params = [x for x in self.params]

        if self.weight_noise:
            self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs]
            self.nW_shortp = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_shortp]

            self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nW_shortp]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
Esempio n. 3
0
 def _init_params(self):
     self.W_hhs = []
     self.b_hhs = []
     for dx in xrange(self.n_layers):
         W_hh = self.init_fn[dx](self.n_hids[(dx - 1) % self.n_layers],
                                 self.n_hids[dx],
                                 self.sparsity[dx],
                                 self.scale[dx],
                                 rng=self.rng)
         self.W_hhs.append(
             theano.shared(value=W_hh, name="W%d_%s" % (dx, self.name)))
         if dx < self.n_layers - 1:
             self.b_hhs.append(
                 theano.shared(self.bias_fn[dx](self.n_hids[dx],
                                                self.bias_scale[dx],
                                                self.rng),
                               name='b%d_%s' % (dx, self.name)))
     self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs]
     self.params_grad_scale = [self.grad_scale for x in self.params]
     self.restricted_params = [x for x in self.params]
     if self.weight_noise:
         self.nW_hhs = [
             theano.shared(x.get_value() * 0, name='noise_' + x.name)
             for x in self.W_hhs
         ]
         self.nb_hhs = [
             theano.shared(x.get_value() * 0, name='noise_' + x.name)
             for x in self.b_hhs
         ]
         self.noise_params = [x for x in self.nW_hhs
                              ] + [x for x in self.nb_hhs]
         self.noise_params_shape_fn = [
             constant_shape(x.get_value().shape) for x in self.noise_params
         ]
Esempio n. 4
0
 def _init_params(self):
     self.W_hh = theano.shared(self.init_fn(self.n_hids,
                                            self.n_hids,
                                            self.sparsity,
                                            self.scale,
                                            rng=self.rng),
                               name="W_%s" % self.name)
     self.params = [self.W_hh]
     if self.gating:
         self.G_hh = theano.shared(self.init_fn(self.n_hids,
                                                self.n_hids,
                                                self.sparsity,
                                                self.scale,
                                                rng=self.rng),
                                   name="G_%s" % self.name)
         self.params.append(self.G_hh)
     if self.reseting:
         self.R_hh = theano.shared(self.init_fn(self.n_hids,
                                                self.n_hids,
                                                self.sparsity,
                                                self.scale,
                                                rng=self.rng),
                                   name="R_%s" % self.name)
         self.params.append(self.R_hh)
     self.params_grad_scale = [self.grad_scale for x in self.params]
     self.restricted_params = [x for x in self.params]
     if self.weight_noise:
         self.nW_hh = theano.shared(self.W_hh.get_value() * 0,
                                    name='noise_' + self.W_hh.name)
         self.nG_hh = theano.shared(self.G_hh.get_value() * 0,
                                    name='noise_' + self.G_hh.name)
         self.noise_params = [self.nW_hh, self.nG_hh]
         self.noise_params_shape_fn = [
             constant_shape(x.get_value().shape) for x in self.noise_params
         ]
Esempio n. 5
0
 def _init_params(self):
     self.W_hh = theano.shared(
             self.init_fn(self.n_hids,
             self.n_hids,
             self.sparsity,
             self.scale,
             rng=self.rng),
             name="W_%s"%self.name)
     self.params = [self.W_hh]
     if self.gating:
         self.G_hh = theano.shared(
                 self.init_fn(self.n_hids,
                 self.n_hids,
                 self.sparsity,
                 self.scale,
                 rng=self.rng),
                 name="G_%s"%self.name)
         self.params.append(self.G_hh)
     if self.reseting:
         self.R_hh = theano.shared(
                 self.init_fn(self.n_hids,
                 self.n_hids,
                 self.sparsity,
                 self.scale,
                 rng=self.rng),
                 name="R_%s"%self.name)
         self.params.append(self.R_hh)
     self.params_grad_scale = [self.grad_scale for x in self.params]
     self.restricted_params = [x for x in self.params]
     if self.weight_noise:
         self.nW_hh = theano.shared(self.W_hh.get_value()*0, name='noise_'+self.W_hh.name)
         self.nG_hh = theano.shared(self.G_hh.get_value()*0, name='noise_'+self.G_hh.name)
         self.noise_params = [self.nW_hh,self.nG_hh]
         self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                         for x in self.noise_params]
Esempio n. 6
0
    def _init_params(self):
        # Left weight matrix
        self.W_hh = theano.shared(
                self.init_fn(self.n_hids,
                self.n_hids,
                self.sparsity,
                self.scale,
                rng=self.rng),
                name="W_%s"%self.name)
        self.params = [self.W_hh]
        # Right weight matrix
        self.U_hh = theano.shared(
                self.init_fn(self.n_hids,
                self.n_hids,
                self.sparsity,
                self.scale,
                rng=self.rng),
                name="U_%s"%self.name)
        self.params += [self.U_hh]
        # Bias
        self.b_hh = theano.shared(
            self.bias_fn(self.n_hids,
                self.bias_scale,
                self.rng),
            name='b_%s' %self.name)
        self.params += [self.b_hh]
        # gaters
        self.GW_hh = theano.shared(
                numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)),
                name="GW_%s"%self.name)
        self.params += [self.GW_hh]
        self.GU_hh = theano.shared(
                numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)),
                name="GU_%s"%self.name)
        self.params += [self.GU_hh]
        self.Gb_hh = theano.shared(
            self.bias_fn(3,
                self.bias_scale,
                self.rng),
            name='Gb_%s' %self.name)
        self.params += [self.Gb_hh]

        self.params_grad_scale = [self.grad_scale for x in self.params]
        self.restricted_params = [x for x in self.params]
        if self.weight_noise:
            self.nW_hh = theano.shared(self.W_hh.get_value()*0, name='noise_'+self.W_hh.name)
            self.nU_hh = theano.shared(self.U_hh.get_value()*0, name='noise_'+self.U_hh.name)
            self.nb_hh = theano.shared(self.b_hh.get_value()*0, name='noise_'+self.b_hh.name)
            self.noise_params = [self.nW_hh,self.nU_hh,self.nb_hh]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
Esempio n. 7
0
    def _init_params(self):
        """
        Initialize the parameters of the layer, either by using sparse initialization or small
        isotropic noise.
        """
        self.W_ems = []
        self.b_ems = []
        if self.rank_n_approx:
            W_em1 = self.init_fn[0](self.n_in,
                                 self.rank_n_approx,
                                 self.sparsity[0],
                                 self.scale[0],
                                 self.rng)
            W_em2 = self.init_fn[0](self.rank_n_approx,
                                 self.n_hids[0],
                                 self.sparsity[0],
                                 self.scale[0],
                                 self.rng)
            self.W_em1 = theano.shared(W_em1,
                                       name='W1_0_%s'%self.name)
            self.W_em2 = theano.shared(W_em2,
                                       name='W2_0_%s'%self.name)
            self.W_ems = [self.W_em1, self.W_em2]

        else:
            W_em = self.init_fn[0](self.n_in,
                                self.n_hids[0],
                                self.sparsity[0],
                                self.scale[0],
                                self.rng)
            self.W_em = theano.shared(W_em,
                                      name='W_0_%s'%self.name)
            self.W_ems = [self.W_em]

        self.b_em = theano.shared(
            self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng),
            name='b_0_%s'%self.name)
        self.b_ems = [self.b_em]

        for dx in xrange(1, self.n_layers):
            W_em = self.init_fn[dx](self.n_hids[dx-1],# / self.pieces[dx],
                                self.n_hids[dx],
                                self.sparsity[dx],
                                self.scale[dx],
                                self.rng)
            W_em = theano.shared(W_em,
                                      name='W_%d_%s'%(dx,self.name))
            self.W_ems += [W_em]

            b_em = theano.shared(
                self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng),
                name='b_%d_%s'%(dx,self.name))
            self.b_ems += [b_em]

        self.params = [x for x in self.W_ems]

        if self.learn_bias and self.learn_bias!='last':
            self.params = [x for x in self.W_ems] + [x for x in self.b_ems]
        elif self.learn_bias == 'last':
            self.params = [x for x in self.W_ems] + [x for x in
                                                     self.b_ems][:-1]
        self.params_grad_scale = [self._grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_ems]
            self.nb_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_ems]

            self.noise_params = [x for x in self.nW_ems] + [x for x in self.nb_ems]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
Esempio n. 8
0
    def _init_params(self):
        """
        Initialize the parameters of the layer, either by using sparse initialization or small
        isotropic noise.
        """
        if self.rank_n_approx:
            W_em1 = self.init_fn(self.nin,
                                 self.rank_n_approx,
                                 self.sparsity,
                                 self.scale,
                                 self.rng)
            W_em2 = self.init_fn(self.rank_n_approx,
                                 self.nout,
                                 self.sparsity,
                                 self.scale,
                                 self.rng)
            self.W_em1 = theano.shared(W_em1,
                                       name='W1_%s' % self.name)
            self.W_em2 = theano.shared(W_em2,
                                       name='W2_%s' % self.name)
            self.b_em = theano.shared(
                numpy.zeros((self.nout,), dtype=theano.config.floatX),
                name='b_%s' % self.name)
            self.params += [self.W_em1, self.W_em2, self.b_em]
            self.myparams = []  # [self.W_em1, self.W_em2, self.b_em]
            if self.weight_noise:
                self.nW_em1 = theano.shared(W_em1 * 0.,
                                            name='noise_W1_%s' % self.name)
                self.nW_em2 = theano.shared(W_em * 0.,
                                            name='noise_W2_%s' % self.name)
                self.nb_em = theano.shared(b_em * 0.,
                                           name='noise_b_%s' % self.name)
                self.noise_params = [self.nW_em1, self.nW_em2, self.nb_em]
                self.noise_params_shape_fn = [
                    constant_shape(x.get_value().shape)
                    for x in self.noise_params]

        else:
            W_em = self.init_fn(self.nin,
                                self.nout,
                                self.sparsity,
                                self.scale,
                                self.rng)
            self.W_em = theano.shared(W_em,
                                      name='W_%s' % self.name)
            self.b_em = theano.shared(
                numpy.zeros((self.nout,), dtype=theano.config.floatX),
                name='b_%s' % self.name)
            self.add_wghs = []
            self.n_add_wghs = []
            if self.additional_inputs:
                for pos, sz in enumerate(self.additional_inputs):
                    W_add = self.init_fn(sz,
                                         self.nout,
                                         self.sparsity,
                                         self.scale,
                                         self.rng)
                    self.add_wghs += [theano.shared(W_add,
                                                    name='W_add%d_%s' % (pos, self.name))]
                    if self.weight_noise:
                        self.n_add_wghs += [theano.shared(W_add * 0.,
                                                          name='noise_W_add%d_%s' % (pos,
                                                                                     self.name))]

            self.params += [self.W_em, self.b_em] + self.add_wghs
            self.myparams = []  # [self.W_em, self.b_em] + self.add_wghs
            if self.weight_noise:
                self.nW_em = theano.shared(W_em * 0.,
                                           name='noise_W_%s' % self.name)
                self.nb_em = theano.shared(numpy.zeros((self.nout,),
                                                       dtype=theano.config.floatX),
                                           name='noise_b_%s' % self.name)
                self.noise_params = [self.nW_em, self.nb_em] + self.n_add_wghs
                self.noise_params_shape_fn = [
                    constant_shape(x.get_value().shape)
                    for x in self.noise_params]
Esempio n. 9
0
    def _init_params(self):
        """
        Initialize the parameters of the layer, either by using sparse initialization or small
        isotropic noise.
        """
        if self.rank_n_approx:
            W_em1 = self.init_fn(self.nin, self.rank_n_approx, self.sparsity,
                                 self.scale, self.rng)
            W_em2 = self.init_fn(self.rank_n_approx, self.nout, self.sparsity,
                                 self.scale, self.rng)
            self.W_em1 = theano.shared(W_em1, name='W1_%s' % self.name)
            self.W_em2 = theano.shared(W_em2, name='W2_%s' % self.name)
            self.b_em = theano.shared(numpy.zeros((self.nout, ),
                                                  dtype=theano.config.floatX),
                                      name='b_%s' % self.name)
            self.params += [self.W_em1, self.W_em2, self.b_em]
            self.myparams = []  #[self.W_em1, self.W_em2, self.b_em]
            if self.weight_noise:
                self.nW_em1 = theano.shared(W_em1 * 0.,
                                            name='noise_W1_%s' % self.name)
                self.nW_em2 = theano.shared(W_em * 0.,
                                            name='noise_W2_%s' % self.name)
                self.nb_em = theano.shared(b_em * 0.,
                                           name='noise_b_%s' % self.name)
                self.noise_params = [self.nW_em1, self.nW_em2, self.nb_em]
                self.noise_params_shape_fn = [
                    constant_shape(x.get_value().shape)
                    for x in self.noise_params
                ]

        else:
            W_em = self.init_fn(self.nin, self.nout, self.sparsity, self.scale,
                                self.rng)
            self.W_em = theano.shared(W_em, name='W_%s' % self.name)
            self.b_em = theano.shared(numpy.zeros((self.nout, ),
                                                  dtype=theano.config.floatX),
                                      name='b_%s' % self.name)
            self.add_wghs = []
            self.n_add_wghs = []
            if self.additional_inputs:
                for pos, sz in enumerate(self.additional_inputs):
                    W_add = self.init_fn(sz, self.nout, self.sparsity,
                                         self.scale, self.rng)
                    self.add_wghs += [
                        theano.shared(W_add,
                                      name='W_add%d_%s' % (pos, self.name))
                    ]
                    if self.weight_noise:
                        self.n_add_wghs += [
                            theano.shared(W_add * 0.,
                                          name='noise_W_add%d_%s' %
                                          (pos, self.name))
                        ]

            self.params += [self.W_em, self.b_em] + self.add_wghs
            self.myparams = []  #[self.W_em, self.b_em] + self.add_wghs
            if self.weight_noise:
                self.nW_em = theano.shared(W_em * 0.,
                                           name='noise_W_%s' % self.name)
                self.nb_em = theano.shared(numpy.zeros(
                    (self.nout, ), dtype=theano.config.floatX),
                                           name='noise_b_%s' % self.name)
                self.noise_params = [self.nW_em, self.nb_em] + self.n_add_wghs
                self.noise_params_shape_fn = [
                    constant_shape(x.get_value().shape)
                    for x in self.noise_params
                ]
Esempio n. 10
0
    def _init_params(self):
        """
        Initialize the parameters of the layer, either by using sparse initialization or small
        isotropic noise.
        """
        self.W_ems = []
        self.b_ems = []
        if self.rank_n_approx:
            W_em1 = self.init_fn[0](self.n_in, self.rank_n_approx,
                                    self.sparsity[0], self.scale[0], self.rng)
            W_em2 = self.init_fn[0](self.rank_n_approx, self.n_hids[0],
                                    self.sparsity[0], self.scale[0], self.rng)
            self.W_em1 = theano.shared(W_em1, name='W1_0_%s' % self.name)
            self.W_em2 = theano.shared(W_em2, name='W2_0_%s' % self.name)
            self.W_ems = [self.W_em1, self.W_em2]

        else:
            W_em = self.init_fn[0](self.n_in, self.n_hids[0], self.sparsity[0],
                                   self.scale[0], self.rng)
            self.W_em = theano.shared(W_em, name='W_0_%s' % self.name)
            self.W_ems = [self.W_em]

        self.b_em = theano.shared(self.bias_fn[0](self.n_hids[0],
                                                  self.bias_scale[0],
                                                  self.rng),
                                  name='b_0_%s' % self.name)
        self.b_ems = [self.b_em]

        for dx in xrange(1, self.n_layers):
            W_em = self.init_fn[dx](self.n_hids[dx - 1] / self.pieces[dx],
                                    self.n_hids[dx], self.sparsity[dx],
                                    self.scale[dx], self.rng)
            W_em = theano.shared(W_em, name='W_%d_%s' % (dx, self.name))
            self.W_ems += [W_em]

            b_em = theano.shared(self.bias_fn[dx](self.n_hids[dx],
                                                  self.bias_scale[dx],
                                                  self.rng),
                                 name='b_%d_%s' % (dx, self.name))
            self.b_ems += [b_em]

        self.params = [x for x in self.W_ems]

        if self.learn_bias and self.learn_bias != 'last':
            self.params = [x for x in self.W_ems] + [x for x in self.b_ems]
        elif self.learn_bias == 'last':
            self.params = [x for x in self.W_ems] + [x
                                                     for x in self.b_ems][:-1]
        self.params_grad_scale = [self._grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_ems = [
                theano.shared(x.get_value() * 0, name='noise_' + x.name)
                for x in self.W_ems
            ]
            self.nb_ems = [
                theano.shared(x.get_value() * 0, name='noise_' + x.name)
                for x in self.b_ems
            ]

            self.noise_params = [x for x in self.nW_ems
                                 ] + [x for x in self.nb_ems]
            self.noise_params_shape_fn = [
                constant_shape(x.get_value().shape) for x in self.noise_params
            ]
Esempio n. 11
0
    def _init_params(self):
        """
        Initialize the parameters of the layer, either by using sparse initialization or small
        isotropic noise.
        """
        self.W_ems = []
        self.b_ems = []
        if self.rank_n_approx:
            W_em1 = self.init_fn[0](self.n_in,
                                 self.rank_n_approx,
                                 self.sparsity[0],
                                 self.scale[0],
                                 self.rng)
            W_em2 = self.init_fn[0](self.rank_n_approx,
                                 self.n_hids[0],
                                 self.sparsity[0],
                                 self.scale[0],
                                 self.rng)
            self.W_em1 = theano.shared(W_em1,
                                       name='W1_0_%s'%self.name)
            self.W_em2 = theano.shared(W_em2,
                                       name='W2_0_%s'%self.name)
            self.W_ems = [self.W_em1, self.W_em2]

        else:
            print 'embedding create'
            n_in = self.n_in
            if self.shared_embed:
                print 'word ezmbedding shared {}',self.shared_embed.shape.eval()
                n_in = self.n_in - self.shared_embed.shape.eval()[0]
                print 'now word size',n_in


            W_em = self.init_fn[0](n_in,
                                self.n_hids[0],
                                self.sparsity[0],
                                self.scale[0],
                                self.rng)
            self.W_em = theano.shared(W_em,
                                      name='W_0_%s'%self.name)
            if self.shared_embed:
                self.prev_W_em = self.W_em
                self.W_em = TT.concatenate([self.shared_embed, self.W_em], axis=0)
            self.W_ems = [self.W_em]

        self.b_em = theano.shared(
            self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng),
            name='b_0_%s'%self.name)
        self.b_ems = [self.b_em]

        assert self.n_layers == 1

        for dx in xrange(1, self.n_layers):
            W_em = self.init_fn[dx](self.n_hids[dx-1] / self.pieces[dx],
                                self.n_hids[dx],
                                self.sparsity[dx],
                                self.scale[dx],
                                self.rng)
            W_em = theano.shared(W_em,
                                      name='W_%d_%s'%(dx,self.name))
            self.W_ems += [W_em]

            b_em = theano.shared(
                self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng),
                name='b_%d_%s'%(dx,self.name))
            self.b_ems += [b_em]

        self.params = [x for x in self.W_ems]


        if self.shared_embed:
            self.params = [self.shared_embed, self.prev_W_em, self.b_ems[0]]
            self.params_grad_scale = [self._grad_scale for x in self.params]
            return
        
        if self.learn_bias and self.learn_bias!='last':
            self.params = [x for x in self.W_ems] + [x for x in self.b_ems]
        elif self.learn_bias == 'last':
            self.params = [x for x in self.W_ems] + [x for x in
                                                     self.b_ems][:-1]
        self.params_grad_scale = [self._grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_ems]
            self.nb_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_ems]

            self.noise_params = [x for x in self.nW_ems] + [x for x in self.nb_ems]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]
Esempio n. 12
0
    def _init_params(self):
        """
        Initialize the parameters of the layer, either by using sparse initialization or small
        isotropic noise.
        """
        self.W_ems = []
        #self.Q_ems = []
        #self.R_ems = []
        #self.b_ems = []

        # if you need rank_n_approx add it here
        Q_em = self.init_fn[0](self.n_in,
                   self.n_hids[0],
                   self.sparsity[0],
                   self.scale[0],
                   self.rng)
        Q_em = theano.shared(Q_em, name='Q_0_%s'%self.name)
        #self.Q_ems = [Q_em]

        R_em = self.init_fn[0](self.n_in,
                   self.n_hids[0],
                   self.sparsity[0],
                   self.scale[0],
                   self.rng)
        R_em = theano.shared(R_em, name='R_0_%s'%self.name)
        #self.R_ems = [R_em]

        self.W_ems = [Q_em, R_em]

        bq_em = theano.shared(
            self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng),
            name='b_q_0_%s'%self.name)
        br_em = theano.shared(
            self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng),
            name='b_r_0_%s'%self.name)
        self.b_ems = [bq_em, br_em]

        for dx in xrange(1, self.n_layers):
            #W_em = self.init_fn[dx](self.n_hids[dx-1] / self.pieces[dx],  Maryam
            Q_em = self.init_fn[dx](self.n_hids[dx-1],
                                self.n_hids[dx],
                                self.sparsity[dx],
                                self.scale[dx],
                                self.rng)
            Q_em = theano.shared(Q_em,
                                      name='Q_%d_%s'%(dx,self.name))
            R_em = self.init_fn[dx](self.n_hids[dx-1],
                                self.n_hids[dx],
                                self.sparsity[dx],
                                self.scale[dx],
                                self.rng)
            R_em = theano.shared(R_em,
                                      name='R_%d_%s'%(dx,self.name))
            self.W_ems += [Q_em, R_em]

            bq_em = theano.shared(
                self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng),
                name='b_q_%d_%s'%(dx,self.name))

            br_em = theano.shared(
                self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng),
                name='b_r_%d_%s'%(dx,self.name))

            self.b_ems += [bq_em, br_em]

        self.params = [x for x in self.W_ems]

        if self.learn_bias and self.learn_bias!='last':
            self.params = [x for x in self.W_ems] + [x for x in self.b_ems]
        elif self.learn_bias == 'last':
            self.params = [x for x in self.W_ems] + [x for x in self.b_ems][:-1]

        self.params_grad_scale = [self._grad_scale for x in self.params]
        if self.weight_noise:
            self.nW_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_ems]
            self.nb_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_ems]

            self.noise_params = [x for x in self.nW_ems] + [x for x in self.nb_ems]
            self.noise_params_shape_fn = [constant_shape(x.get_value().shape)
                            for x in self.noise_params]