def compile_train(self, updates_dict=None): print 'compiling training function...' x = self.x y = self.y subb_ind = T.iscalar('subb') # sub batch index shared_x = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] shared_y = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] cost = self.cost error = self.error #errors_top_5 = self.output_layer.errors_top_x(y) self.grads = T.grad(cost, self.params) if updates_dict == None: from modelbase import updates_dict updates_w, updates_v, updates_dv = updates_dict(self.config, self) self.train = theano.function([subb_ind], [cost, error], updates=updates_w, givens=[(x, shared_x), (y, shared_y)]) self.get_vel = theano.function([subb_ind], [cost, error], updates=updates_v, givens=[(x, shared_x), (y, shared_y)]) self.descent_vel = theano.function([], [], updates=updates_dv)
def compile_train(self, updates_dict=None): print 'compiling training function...' x = self.x y = self.y subb_ind = T.iscalar('subb') # sub batch index shared_x = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] shared_y = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] cost = self.output_layer.negative_log_likelihood(y) error = self.output_layer.errors(y) #errors_top_5 = self.output_layer.errors_top_x(y) self.grads = T.grad(cost, self.params) if updates_dict == None: from modelbase import updates_dict updates_w, updates_v, updates_dv = updates_dict(self.config, self) if self.config['monitor_grad']: shared_grads = [ theano.shared(param_i.get_value() * 0.) for param_i in self.params ] updates_g = zip(shared_grads, self.grads) updates_w += updates_g norms = [grad.norm(L=2) for grad in shared_grads] self.get_norm = theano.function([subb_ind], norms, givens=[(x, shared_x), (y, shared_y)]) self.train = theano.function([subb_ind], [cost, error], updates=updates_w, givens=[(x, shared_x), (y, shared_y)]) self.get_vel = theano.function([subb_ind], [cost, error], updates=updates_v, givens=[(x, shared_x), (y, shared_y)]) self.descent_vel = theano.function([], [], updates=updates_dv)
def compile_train(self, updates_dict=None): print 'compiling training function...' x = self.x y = self.y subb_ind = T.iscalar('subb') # sub batch index shared_x = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] shared_y=self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] cost = self.output_layer.negative_log_likelihood(y) error = self.output_layer.errors(y) #errors_top_5 = self.output_layer.errors_top_x(y) self.grads = T.grad(cost,self.params) if updates_dict == None: from modelbase import updates_dict updates_w,updates_v,updates_dv = updates_dict(self.config, self) if self.config['monitor_grad']: shared_grads = [theano.shared(param_i.get_value() * 0.) for param_i in self.params] updates_g = zip(shared_grads, self.grads) updates_w+=updates_g norms = [grad.norm(L=2) for grad in shared_grads] self.get_norm = theano.function([subb_ind], norms, givens=[(x, shared_x), (y, shared_y)] ) self.train= theano.function([subb_ind], [cost,error], updates=updates_w, givens=[(x, shared_x), (y, shared_y)] ) self.get_vel= theano.function([subb_ind], [cost,error], updates=updates_v, givens=[(x, shared_x), (y, shared_y)] ) self.descent_vel = theano.function([],[],updates=updates_dv)
def compile_train(self, updates_dict=None): if self.verbose: print 'compiling training function...' x,y,lr = self.x, self.y, self.lr subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() shared_x = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] shared_y=self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] shared_lr = self.shared_lr cost = self.cost error = self.errors params = self.params grads = self.grads if updates_dict == None: from modelbase import updates_dict updates_w,updates_v,updates_dv = updates_dict(self.config, model=self) if self.config['monitor_grad']: norms = [grad.norm(L=2) for grad in self.grads] self.get_norm = theano.function([subb_ind], norms, givens=[(x, shared_x), (y, shared_y)] ) self.train= theano.function([subb_ind], [cost,error], updates=updates_w, givens=[(x, shared_x), (y, shared_y), (lr, shared_lr)] ) self.get_vel= theano.function([subb_ind], [cost,error], updates=updates_v, givens=[(x, shared_x), (y, shared_y), (lr, shared_lr)] ) self.descent_vel = theano.function([],[],updates=updates_dv)
def compile_train(self, updates_dict=None): if self.verbose: print 'compiling training function...' x, y, lr = self.x, self.y, self.lr subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() shared_x = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] shared_y = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] shared_lr = self.shared_lr cost = self.cost error = self.errors params = self.params grads = self.grads if updates_dict == None: from modelbase import updates_dict updates_w, updates_v, updates_dv = updates_dict(self.config, model=self) if self.config['monitor_grad']: norms = [grad.norm(L=2) for grad in self.grads] self.get_norm = theano.function([subb_ind], norms, givens=[(x, shared_x), (y, shared_y)]) self.train = theano.function([subb_ind], [cost, error], updates=updates_w, givens=[(x, shared_x), (y, shared_y), (lr, shared_lr)]) self.get_vel = theano.function([subb_ind], [cost, error], updates=updates_v, givens=[(x, shared_x), (y, shared_y), (lr, shared_lr)]) self.descent_vel = theano.function([], [], updates=updates_dv)
def compile_train(self, updates_dict=None): print 'compiling training function...' x = self.x y = self.y subb_ind = T.iscalar('subb') # sub batch index shared_x = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] shared_y=self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] cost = self.cost error = self.error #errors_top_5 = self.output_layer.errors_top_x(y) self.grads = T.grad(cost,self.params) if updates_dict == None: from modelbase import updates_dict updates_w,updates_v,updates_dv = updates_dict(self.config, self) self.train= theano.function([subb_ind], [cost,error], updates=updates_w, givens=[(x, shared_x), (y, shared_y)] ) self.get_vel= theano.function([subb_ind], [cost,error], updates=updates_v, givens=[(x, shared_x), (y, shared_y)] ) self.descent_vel = theano.function([],[],updates=updates_dv)
def compile_train(self, updates_dict=None): print 'compiling training function...' x = self.x y = self.y subb_ind = T.iscalar('subb') # sub batch index shared_x = self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].dimshuffle(3, 0, 1, 2) # c01b to bc01 shared_y=self.shared_y[subb_ind*self.batch_size:(subb_ind+1)*self.batch_size] # training from lasagne.layers import get_output prediction = lasagne.layers.get_output(self.output_layer, x, deterministic=False) loss = lasagne.objectives.categorical_crossentropy(prediction, y).mean() error = self.errors(prediction, y) # self.output = softmax_layer.p_y_given_x # self.cost = softmax_layer.negative_log_likelihood(y)+\ # 0.3*aux1.negative_log_likelihood(y)+0.3*aux2.negative_log_likelihood(y) self.grads = T.grad(loss,self.params) if self.config['train_mode'] == 'cdd': if updates_dict == None: from modelbase import updates_dict updates_w,updates_v,updates_dv = updates_dict(self.config, self) else: updates_w = lasagne.updates.nesterov_momentum( loss, self.params, learning_rate=self.shared_lr.get_value(), momentum=self.mu) if self.config['monitor_grad']: shared_grads = [theano.shared(param_i.get_value() * 0.) for param_i in self.params] updates_g = zip(shared_grads, self.grads) updates_w+=updates_g norms = [grad.norm(L=2) for grad in shared_grads] self.get_norm = theano.function([subb_ind], norms, givens=[(x, shared_x), (y, shared_y)] ) self.train= theano.function([subb_ind], [loss,error], updates=updates_w, givens=[(x, shared_x), (y, shared_y)] ) self.get_vel= theano.function([subb_ind], [loss,error], updates=updates_v, givens=[(x, shared_x), (y, shared_y)] ) self.descent_vel = theano.function([],[],updates=updates_dv)