def _init_params(self): # Left weight matrix self.W_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="W_%s" % self.name ) self.params = [self.W_hh] # Right weight matrix self.U_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="U_%s" % self.name ) self.params += [self.U_hh] # Bias self.b_hh = theano.shared(self.bias_fn(self.n_hids, self.bias_scale, self.rng), name="b_%s" % self.name) self.params += [self.b_hh] # gaters # if self.conv_mode == "conv": self.GW_hh = theano.shared(numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)), name="GW_%s" % self.name) self.params += [self.GW_hh] self.GU_hh = theano.shared(numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)), name="GU_%s" % self.name) self.params += [self.GU_hh] self.Gb_hh = theano.shared(self.bias_fn(3, self.bias_scale, self.rng), name="Gb_%s" % self.name) self.params += [self.Gb_hh] self.params_grad_scale = [self.grad_scale for x in self.params] self.restricted_params = [x for x in self.params] if self.weight_noise: self.nW_hh = theano.shared(self.W_hh.get_value() * 0, name="noise_" + self.W_hh.name) self.nU_hh = theano.shared(self.U_hh.get_value() * 0, name="noise_" + self.U_hh.name) self.nb_hh = theano.shared(self.b_hh.get_value() * 0, name="noise_" + self.b_hh.name) self.noise_params = [self.nW_hh, self.nU_hh, self.nb_hh] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): self.W_hhs = [] self.W_shortp = [] for dx in xrange(self.n_layers): W_hh = self.init_fn[dx](self.n_hids[(dx-1)%self.n_layers], self.n_hids[dx], self.sparsity[dx], self.scale[dx], rng=self.rng) self.W_hhs.append(theano.shared(value=W_hh, name="W%d_%s" % (dx,self.name))) if dx > 0: W_shp = self.init_fn[dx](self.n_hids[self.n_layers-1], self.n_hids[dx], self.sparsity[dx], self.scale[dx], rng=self.rng) self.W_shortp.append(theano.shared(value=W_shp, name='W_s%d_%s'%(dx,self.name))) self.params = [x for x in self.W_hhs] +\ [x for x in self.W_shortp] self.params_grad_scale = [self.grad_scale for x in self.params] self.restricted_params = [x for x in self.params] if self.weight_noise: self.nW_hhs = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_hhs] self.nW_shortp = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_shortp] self.noise_params = [x for x in self.nW_hhs] + [x for x in self.nW_shortp] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): self.W_hhs = [] self.b_hhs = [] for dx in xrange(self.n_layers): W_hh = self.init_fn[dx](self.n_hids[(dx - 1) % self.n_layers], self.n_hids[dx], self.sparsity[dx], self.scale[dx], rng=self.rng) self.W_hhs.append( theano.shared(value=W_hh, name="W%d_%s" % (dx, self.name))) if dx < self.n_layers - 1: self.b_hhs.append( theano.shared(self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx], self.rng), name='b%d_%s' % (dx, self.name))) self.params = [x for x in self.W_hhs] + [x for x in self.b_hhs] self.params_grad_scale = [self.grad_scale for x in self.params] self.restricted_params = [x for x in self.params] if self.weight_noise: self.nW_hhs = [ theano.shared(x.get_value() * 0, name='noise_' + x.name) for x in self.W_hhs ] self.nb_hhs = [ theano.shared(x.get_value() * 0, name='noise_' + x.name) for x in self.b_hhs ] self.noise_params = [x for x in self.nW_hhs ] + [x for x in self.nb_hhs] self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params ]
def _init_params(self): self.W_hh = theano.shared(self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="W_%s" % self.name) self.params = [self.W_hh] if self.gating: self.G_hh = theano.shared(self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="G_%s" % self.name) self.params.append(self.G_hh) if self.reseting: self.R_hh = theano.shared(self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="R_%s" % self.name) self.params.append(self.R_hh) self.params_grad_scale = [self.grad_scale for x in self.params] self.restricted_params = [x for x in self.params] if self.weight_noise: self.nW_hh = theano.shared(self.W_hh.get_value() * 0, name='noise_' + self.W_hh.name) self.nG_hh = theano.shared(self.G_hh.get_value() * 0, name='noise_' + self.G_hh.name) self.noise_params = [self.nW_hh, self.nG_hh] self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params ]
def _init_params(self): self.W_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="W_%s"%self.name) self.params = [self.W_hh] if self.gating: self.G_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="G_%s"%self.name) self.params.append(self.G_hh) if self.reseting: self.R_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="R_%s"%self.name) self.params.append(self.R_hh) self.params_grad_scale = [self.grad_scale for x in self.params] self.restricted_params = [x for x in self.params] if self.weight_noise: self.nW_hh = theano.shared(self.W_hh.get_value()*0, name='noise_'+self.W_hh.name) self.nG_hh = theano.shared(self.G_hh.get_value()*0, name='noise_'+self.G_hh.name) self.noise_params = [self.nW_hh,self.nG_hh] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): # Left weight matrix self.W_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="W_%s"%self.name) self.params = [self.W_hh] # Right weight matrix self.U_hh = theano.shared( self.init_fn(self.n_hids, self.n_hids, self.sparsity, self.scale, rng=self.rng), name="U_%s"%self.name) self.params += [self.U_hh] # Bias self.b_hh = theano.shared( self.bias_fn(self.n_hids, self.bias_scale, self.rng), name='b_%s' %self.name) self.params += [self.b_hh] # gaters self.GW_hh = theano.shared( numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)), name="GW_%s"%self.name) self.params += [self.GW_hh] self.GU_hh = theano.shared( numpy.float32(0.01 * self.rng.randn(self.n_hids, 3)), name="GU_%s"%self.name) self.params += [self.GU_hh] self.Gb_hh = theano.shared( self.bias_fn(3, self.bias_scale, self.rng), name='Gb_%s' %self.name) self.params += [self.Gb_hh] self.params_grad_scale = [self.grad_scale for x in self.params] self.restricted_params = [x for x in self.params] if self.weight_noise: self.nW_hh = theano.shared(self.W_hh.get_value()*0, name='noise_'+self.W_hh.name) self.nU_hh = theano.shared(self.U_hh.get_value()*0, name='noise_'+self.U_hh.name) self.nb_hh = theano.shared(self.b_hh.get_value()*0, name='noise_'+self.b_hh.name) self.noise_params = [self.nW_hh,self.nU_hh,self.nb_hh] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): """ Initialize the parameters of the layer, either by using sparse initialization or small isotropic noise. """ self.W_ems = [] self.b_ems = [] if self.rank_n_approx: W_em1 = self.init_fn[0](self.n_in, self.rank_n_approx, self.sparsity[0], self.scale[0], self.rng) W_em2 = self.init_fn[0](self.rank_n_approx, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) self.W_em1 = theano.shared(W_em1, name='W1_0_%s'%self.name) self.W_em2 = theano.shared(W_em2, name='W2_0_%s'%self.name) self.W_ems = [self.W_em1, self.W_em2] else: W_em = self.init_fn[0](self.n_in, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) self.W_em = theano.shared(W_em, name='W_0_%s'%self.name) self.W_ems = [self.W_em] self.b_em = theano.shared( self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng), name='b_0_%s'%self.name) self.b_ems = [self.b_em] for dx in xrange(1, self.n_layers): W_em = self.init_fn[dx](self.n_hids[dx-1],# / self.pieces[dx], self.n_hids[dx], self.sparsity[dx], self.scale[dx], self.rng) W_em = theano.shared(W_em, name='W_%d_%s'%(dx,self.name)) self.W_ems += [W_em] b_em = theano.shared( self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng), name='b_%d_%s'%(dx,self.name)) self.b_ems += [b_em] self.params = [x for x in self.W_ems] if self.learn_bias and self.learn_bias!='last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems] elif self.learn_bias == 'last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems][:-1] self.params_grad_scale = [self._grad_scale for x in self.params] if self.weight_noise: self.nW_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_ems] self.nb_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_ems] self.noise_params = [x for x in self.nW_ems] + [x for x in self.nb_ems] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): """ Initialize the parameters of the layer, either by using sparse initialization or small isotropic noise. """ if self.rank_n_approx: W_em1 = self.init_fn(self.nin, self.rank_n_approx, self.sparsity, self.scale, self.rng) W_em2 = self.init_fn(self.rank_n_approx, self.nout, self.sparsity, self.scale, self.rng) self.W_em1 = theano.shared(W_em1, name='W1_%s' % self.name) self.W_em2 = theano.shared(W_em2, name='W2_%s' % self.name) self.b_em = theano.shared( numpy.zeros((self.nout,), dtype=theano.config.floatX), name='b_%s' % self.name) self.params += [self.W_em1, self.W_em2, self.b_em] self.myparams = [] # [self.W_em1, self.W_em2, self.b_em] if self.weight_noise: self.nW_em1 = theano.shared(W_em1 * 0., name='noise_W1_%s' % self.name) self.nW_em2 = theano.shared(W_em * 0., name='noise_W2_%s' % self.name) self.nb_em = theano.shared(b_em * 0., name='noise_b_%s' % self.name) self.noise_params = [self.nW_em1, self.nW_em2, self.nb_em] self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params] else: W_em = self.init_fn(self.nin, self.nout, self.sparsity, self.scale, self.rng) self.W_em = theano.shared(W_em, name='W_%s' % self.name) self.b_em = theano.shared( numpy.zeros((self.nout,), dtype=theano.config.floatX), name='b_%s' % self.name) self.add_wghs = [] self.n_add_wghs = [] if self.additional_inputs: for pos, sz in enumerate(self.additional_inputs): W_add = self.init_fn(sz, self.nout, self.sparsity, self.scale, self.rng) self.add_wghs += [theano.shared(W_add, name='W_add%d_%s' % (pos, self.name))] if self.weight_noise: self.n_add_wghs += [theano.shared(W_add * 0., name='noise_W_add%d_%s' % (pos, self.name))] self.params += [self.W_em, self.b_em] + self.add_wghs self.myparams = [] # [self.W_em, self.b_em] + self.add_wghs if self.weight_noise: self.nW_em = theano.shared(W_em * 0., name='noise_W_%s' % self.name) self.nb_em = theano.shared(numpy.zeros((self.nout,), dtype=theano.config.floatX), name='noise_b_%s' % self.name) self.noise_params = [self.nW_em, self.nb_em] + self.n_add_wghs self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): """ Initialize the parameters of the layer, either by using sparse initialization or small isotropic noise. """ if self.rank_n_approx: W_em1 = self.init_fn(self.nin, self.rank_n_approx, self.sparsity, self.scale, self.rng) W_em2 = self.init_fn(self.rank_n_approx, self.nout, self.sparsity, self.scale, self.rng) self.W_em1 = theano.shared(W_em1, name='W1_%s' % self.name) self.W_em2 = theano.shared(W_em2, name='W2_%s' % self.name) self.b_em = theano.shared(numpy.zeros((self.nout, ), dtype=theano.config.floatX), name='b_%s' % self.name) self.params += [self.W_em1, self.W_em2, self.b_em] self.myparams = [] #[self.W_em1, self.W_em2, self.b_em] if self.weight_noise: self.nW_em1 = theano.shared(W_em1 * 0., name='noise_W1_%s' % self.name) self.nW_em2 = theano.shared(W_em * 0., name='noise_W2_%s' % self.name) self.nb_em = theano.shared(b_em * 0., name='noise_b_%s' % self.name) self.noise_params = [self.nW_em1, self.nW_em2, self.nb_em] self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params ] else: W_em = self.init_fn(self.nin, self.nout, self.sparsity, self.scale, self.rng) self.W_em = theano.shared(W_em, name='W_%s' % self.name) self.b_em = theano.shared(numpy.zeros((self.nout, ), dtype=theano.config.floatX), name='b_%s' % self.name) self.add_wghs = [] self.n_add_wghs = [] if self.additional_inputs: for pos, sz in enumerate(self.additional_inputs): W_add = self.init_fn(sz, self.nout, self.sparsity, self.scale, self.rng) self.add_wghs += [ theano.shared(W_add, name='W_add%d_%s' % (pos, self.name)) ] if self.weight_noise: self.n_add_wghs += [ theano.shared(W_add * 0., name='noise_W_add%d_%s' % (pos, self.name)) ] self.params += [self.W_em, self.b_em] + self.add_wghs self.myparams = [] #[self.W_em, self.b_em] + self.add_wghs if self.weight_noise: self.nW_em = theano.shared(W_em * 0., name='noise_W_%s' % self.name) self.nb_em = theano.shared(numpy.zeros( (self.nout, ), dtype=theano.config.floatX), name='noise_b_%s' % self.name) self.noise_params = [self.nW_em, self.nb_em] + self.n_add_wghs self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params ]
def _init_params(self): """ Initialize the parameters of the layer, either by using sparse initialization or small isotropic noise. """ self.W_ems = [] self.b_ems = [] if self.rank_n_approx: W_em1 = self.init_fn[0](self.n_in, self.rank_n_approx, self.sparsity[0], self.scale[0], self.rng) W_em2 = self.init_fn[0](self.rank_n_approx, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) self.W_em1 = theano.shared(W_em1, name='W1_0_%s' % self.name) self.W_em2 = theano.shared(W_em2, name='W2_0_%s' % self.name) self.W_ems = [self.W_em1, self.W_em2] else: W_em = self.init_fn[0](self.n_in, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) self.W_em = theano.shared(W_em, name='W_0_%s' % self.name) self.W_ems = [self.W_em] self.b_em = theano.shared(self.bias_fn[0](self.n_hids[0], self.bias_scale[0], self.rng), name='b_0_%s' % self.name) self.b_ems = [self.b_em] for dx in xrange(1, self.n_layers): W_em = self.init_fn[dx](self.n_hids[dx - 1] / self.pieces[dx], self.n_hids[dx], self.sparsity[dx], self.scale[dx], self.rng) W_em = theano.shared(W_em, name='W_%d_%s' % (dx, self.name)) self.W_ems += [W_em] b_em = theano.shared(self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx], self.rng), name='b_%d_%s' % (dx, self.name)) self.b_ems += [b_em] self.params = [x for x in self.W_ems] if self.learn_bias and self.learn_bias != 'last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems] elif self.learn_bias == 'last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems][:-1] self.params_grad_scale = [self._grad_scale for x in self.params] if self.weight_noise: self.nW_ems = [ theano.shared(x.get_value() * 0, name='noise_' + x.name) for x in self.W_ems ] self.nb_ems = [ theano.shared(x.get_value() * 0, name='noise_' + x.name) for x in self.b_ems ] self.noise_params = [x for x in self.nW_ems ] + [x for x in self.nb_ems] self.noise_params_shape_fn = [ constant_shape(x.get_value().shape) for x in self.noise_params ]
def _init_params(self): """ Initialize the parameters of the layer, either by using sparse initialization or small isotropic noise. """ self.W_ems = [] self.b_ems = [] if self.rank_n_approx: W_em1 = self.init_fn[0](self.n_in, self.rank_n_approx, self.sparsity[0], self.scale[0], self.rng) W_em2 = self.init_fn[0](self.rank_n_approx, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) self.W_em1 = theano.shared(W_em1, name='W1_0_%s'%self.name) self.W_em2 = theano.shared(W_em2, name='W2_0_%s'%self.name) self.W_ems = [self.W_em1, self.W_em2] else: print 'embedding create' n_in = self.n_in if self.shared_embed: print 'word ezmbedding shared {}',self.shared_embed.shape.eval() n_in = self.n_in - self.shared_embed.shape.eval()[0] print 'now word size',n_in W_em = self.init_fn[0](n_in, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) self.W_em = theano.shared(W_em, name='W_0_%s'%self.name) if self.shared_embed: self.prev_W_em = self.W_em self.W_em = TT.concatenate([self.shared_embed, self.W_em], axis=0) self.W_ems = [self.W_em] self.b_em = theano.shared( self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng), name='b_0_%s'%self.name) self.b_ems = [self.b_em] assert self.n_layers == 1 for dx in xrange(1, self.n_layers): W_em = self.init_fn[dx](self.n_hids[dx-1] / self.pieces[dx], self.n_hids[dx], self.sparsity[dx], self.scale[dx], self.rng) W_em = theano.shared(W_em, name='W_%d_%s'%(dx,self.name)) self.W_ems += [W_em] b_em = theano.shared( self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng), name='b_%d_%s'%(dx,self.name)) self.b_ems += [b_em] self.params = [x for x in self.W_ems] if self.shared_embed: self.params = [self.shared_embed, self.prev_W_em, self.b_ems[0]] self.params_grad_scale = [self._grad_scale for x in self.params] return if self.learn_bias and self.learn_bias!='last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems] elif self.learn_bias == 'last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems][:-1] self.params_grad_scale = [self._grad_scale for x in self.params] if self.weight_noise: self.nW_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_ems] self.nb_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_ems] self.noise_params = [x for x in self.nW_ems] + [x for x in self.nb_ems] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]
def _init_params(self): """ Initialize the parameters of the layer, either by using sparse initialization or small isotropic noise. """ self.W_ems = [] #self.Q_ems = [] #self.R_ems = [] #self.b_ems = [] # if you need rank_n_approx add it here Q_em = self.init_fn[0](self.n_in, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) Q_em = theano.shared(Q_em, name='Q_0_%s'%self.name) #self.Q_ems = [Q_em] R_em = self.init_fn[0](self.n_in, self.n_hids[0], self.sparsity[0], self.scale[0], self.rng) R_em = theano.shared(R_em, name='R_0_%s'%self.name) #self.R_ems = [R_em] self.W_ems = [Q_em, R_em] bq_em = theano.shared( self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng), name='b_q_0_%s'%self.name) br_em = theano.shared( self.bias_fn[0](self.n_hids[0], self.bias_scale[0],self.rng), name='b_r_0_%s'%self.name) self.b_ems = [bq_em, br_em] for dx in xrange(1, self.n_layers): #W_em = self.init_fn[dx](self.n_hids[dx-1] / self.pieces[dx], Maryam Q_em = self.init_fn[dx](self.n_hids[dx-1], self.n_hids[dx], self.sparsity[dx], self.scale[dx], self.rng) Q_em = theano.shared(Q_em, name='Q_%d_%s'%(dx,self.name)) R_em = self.init_fn[dx](self.n_hids[dx-1], self.n_hids[dx], self.sparsity[dx], self.scale[dx], self.rng) R_em = theano.shared(R_em, name='R_%d_%s'%(dx,self.name)) self.W_ems += [Q_em, R_em] bq_em = theano.shared( self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng), name='b_q_%d_%s'%(dx,self.name)) br_em = theano.shared( self.bias_fn[dx](self.n_hids[dx], self.bias_scale[dx],self.rng), name='b_r_%d_%s'%(dx,self.name)) self.b_ems += [bq_em, br_em] self.params = [x for x in self.W_ems] if self.learn_bias and self.learn_bias!='last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems] elif self.learn_bias == 'last': self.params = [x for x in self.W_ems] + [x for x in self.b_ems][:-1] self.params_grad_scale = [self._grad_scale for x in self.params] if self.weight_noise: self.nW_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.W_ems] self.nb_ems = [theano.shared(x.get_value()*0, name='noise_'+x.name) for x in self.b_ems] self.noise_params = [x for x in self.nW_ems] + [x for x in self.nb_ems] self.noise_params_shape_fn = [constant_shape(x.get_value().shape) for x in self.noise_params]