def train_LR(self, lr): trainer.train_LR(self, lr) dataloader = DataLoader() datasets =dataloader.load_shared_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] params=np.empty((28*28)*10+10); climin.initialize.randomize_normal(params,0,1) params = params/(28*28) lr.setParams(params); x=lr.x y=lr.y cost = ( lr.negative_log_likelihood(y) + self.L1_lambda * lr.L1 + self.L2_lambda * lr.L2_sqr ) g_W = T.grad(cost=cost, wrt=lr.W) g_b = T.grad(cost=cost, wrt=lr.b) g_W_model = theano.function( inputs=[x,y], outputs=g_W ) g_b_model = theano.function( inputs=[x,y], outputs=g_b ) batch_size = self.batch_size index = T.lscalar() test_err_model = theano.function( inputs=[index], outputs=lr.zeroOneLoss(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) train_err_model = theano.function( inputs=[index], outputs=lr.zeroOneLoss(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_err_model = theano.function( inputs=[index], outputs=lr.zeroOneLoss(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute number of minibatches for training, validation and testing batch_size = self.batch_size; n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size def train_error(): train_losses = [train_err_model(i) for i in xrange(n_train_batches)] this_train_losses = np.mean(train_losses) return this_train_losses; def validate_error(): validation_losses = [validate_err_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) return this_validation_loss; def test_error(): test_losses = [test_err_model(i) for i in xrange(n_test_batches)] this_test_loss = np.mean(test_losses) return this_test_loss; def d_loss_wrt_pars(parameters, inpt, targets): lr.setParams(parameters) gwValue = g_W_model(inpt,targets) gbValue = g_b_model(inpt,targets) return np.concatenate([gwValue.flatten(),gbValue]) args = ((i, {}) for i in climin.util.iter_minibatches([train_set_x.eval(), train_set_y.eval()], self.batch_size, [0, 0])) opt = climin.rmsprop.RmsProp(params, d_loss_wrt_pars, step_rate=self.learning_rate,decay=self.decay, momentum=self.momentum, args=args) validation_frequency = n_train_batches directory=check_create_observations_dir() self.output_directory = directory bestValidationLoss = np.Inf; for info in opt: if info['n_iter'] % validation_frequency ==0: epoch_no = info['n_iter']/n_train_batches train_err=train_error() validation_err = validate_error() test_err = test_error() self.add_train_data(epoch_no, train_err, validation_err, test_err) if epoch_no % 10 ==0: repfields_path=os.path.join(directory,"repFields"+str(epoch_no).zfill(3)+'.png') W_vals=lr.W.get_value(borrow=True) display(W_vals,repfields_path) if epoch_no >= self.n_epochs: break if validation_err < bestValidationLoss: bestValidationLoss = validation_err # # if validation_err *0.95 > bestValidationLoss: # # print "Best Validation Error : %f Validation err:%f " %(bestValidationLoss,validation_err) # break; if epoch_no > 15 and train_err < 0.9* validation_err: break print "Iteration no: %d Validation error = %f" %(epoch_no,validation_err*100) trainer.save_errors(self, directory) repfields_final_path=os.path.join(directory,"repFields.png") W_vals=lr.W.get_value(borrow=True) display(W_vals,repfields_final_path)
def train_LR(self, lr): trainer.train_LR(self, lr) dataloader = DataLoader() datasets =dataloader.load_shared_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing batch_size = self.batch_size; n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() # index to a [mini]batch x = lr.x # the data is presented as rasterized images y = lr.y cost = ( lr.negative_log_likelihood(y) + self.L1_lambda * lr.L1 + self.L2_lambda * lr.L2_sqr ) test_model = theano.function( inputs=[index], outputs=lr.zeroOneLoss(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) train_err_model = theano.function( inputs=[index], outputs=lr.zeroOneLoss(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=lr.zeroOneLoss(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) g_W = T.grad(cost=cost, wrt=lr.W) g_b = T.grad(cost=cost, wrt=lr.b) updates = [(lr.W, lr.W - self.learning_rate * g_W), (lr.b, lr.b - self.learning_rate * g_b)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) def train_error(): train_losses = [train_err_model(i) for i in xrange(n_train_batches)] this_train_losses = numpy.mean(train_losses) return this_train_losses; def validate_error(): validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) return this_validation_loss; def test_error(): test_losses = [test_model(i) for i in xrange(n_test_batches)] this_test_loss = numpy.mean(test_losses) return this_test_loss; print '... training' #Train in mini batches minEpochs = 4 validationFrequency = n_train_batches; iteration = 0; bestValidationLoss = numpy.Inf; max_epoch_reached = False directory=check_create_observations_dir() self.output_directory = directory while not max_epoch_reached : iteration = iteration + 1; epochNo = (iteration / n_train_batches) batchId= iteration % n_train_batches; currentCost=train_model(batchId) if iteration % validationFrequency == 0: validation_err = validate_error() test_err = test_error() train_err = train_error() print "Epoch no: %d Validation Loss = %f" %(epochNo,validation_err*100) self.add_train_data(epochNo, train_err, validation_err, test_err) if epochNo %5 ==0: W_vals=lr.W.get_value(borrow=True) repfields_path=os.path.join(directory,"repFields"+str(epochNo).zfill(3)+'.png') display(W_vals,repfields_path) if validation_err < bestValidationLoss: bestValidationLoss = validation_err if epochNo > minEpochs and validation_err *0.995 > bestValidationLoss: #print "------------------------Validation Loss = %f" %(validationLoss*100) break; if epochNo >= self.n_epochs: max_epoch_reached = True testLoss=test_error() print "iteration %d complete. Cost = %f Best Validation Loss = %f Test Loss = %f" %(iteration,currentCost,bestValidationLoss *100,testLoss *100) trainer.save_errors(self, directory) repfields_final_path=os.path.join(directory,"repFields.png") W_vals=lr.W.get_value(borrow=True) display(W_vals,repfields_final_path)
def train_NN(self, nn): trainer.train_NN(self, nn) data_loader = DataLoader() datasets = data_loader.load_shared_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing batch_size = self.batch_size; n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() # index to a [mini]batch x = nn.input # the data is presented as rasterized images y = T.ivector('y') cost = (nn.negative_log_likelihood_dropout(y) + self.L1_lambda * nn.L1 + self.L2_lambda * nn.L2 ) train_err_model = theano.function( inputs=[index], outputs=nn.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) test_err_model = theano.function( inputs=[index], outputs=nn.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_err_model = theano.function( inputs=[index], outputs=nn.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) gparams = [T.grad(cost, param) for param in nn.params] updates = [ (param, param - self.learning_rate * gparam) for param, gparam in zip(nn.params, gparams) ] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) def validate(): validation_losses = [validate_err_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) return this_validation_loss; def test(): test_losses = [test_err_model(i) for i in xrange(n_test_batches)] this_test_loss = numpy.mean(test_losses) return this_test_loss; def train(): train_losses = [train_err_model(i) for i in xrange(n_train_batches)] this_train_loss = numpy.mean(train_losses) return this_train_loss; print '... training' #Train in mini batches minEpochs = 4 validationFrequency = n_train_batches; iteration = 0; bestValidationLoss = numpy.Inf; directory=check_create_observations_dir() self.output_directory = directory max_epoch_reached = False while not max_epoch_reached : iteration = iteration + 1; epochNo = (iteration / n_train_batches) + 1 batchId= iteration % n_train_batches; currentCost=train_model(batchId) #print "Cost = %f" %(currentCost) if iteration % validationFrequency == 0: validation_err = validate() train_err = train() test_err = test() self.add_train_data(epochNo, train_err, validation_err, test_err) print "Epoch no: %d Validation Loss = %f" %(epochNo,validation_err*100) if validation_err < bestValidationLoss: bestValidationLoss = validation_err if epochNo > minEpochs and validation_err *self.early_stopping_threshold > bestValidationLoss: #print "------------------------Validation Loss = %f" %(validationLoss*100) break; if epochNo >= self.n_epochs: max_epoch_reached = True testLoss=test() trainer.save_errors(self, directory) repfields_final_path=os.path.join(directory,"repFields.png") W_vals=nn.W1.get_value(borrow=True) display(W_vals,repfields_final_path) print "iteration %d complete. Cost = %f Best Validation Loss = %f Test Loss = %f" %(iteration,currentCost,bestValidationLoss *100,testLoss *100)
def train_NN(self, nn): trainer.train_NN(self, nn) data_loader = DataLoader() datasets = data_loader.load_shared_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing batch_size = self.batch_size; n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() # index to a [mini]batch x = nn.input # the data is presented as rasterized images y = T.ivector('y') cost = ( nn.negative_log_likelihood_dropout(y) + self.L1_lambda * nn.L1 + self.L2_lambda * nn.L2 ) test_err_model = theano.function( inputs=[index], outputs=nn.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_err_model = theano.function( inputs=[index], outputs=nn.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) gparams = [T.grad(cost, param) for param in nn.params]; s =nn.params # grad_model = theano.function( # inputs=[index], # outputs=gparams(y), # givens={ # x: valid_set_x[index * batch_size:(index + 1) * batch_size], # y: valid_set_y[index * batch_size:(index + 1) * batch_size] # }) # mean_square_t = [theano.shared( # value=np.zeros( # (param.shape), # dtype=theano.config.floatX # ), borrow=True # ) for param in mlp.params] l=[] mean_square_t=l mlp_params = [] #mlp_params = theano.printing.Print('this is a very important value')(mlp_params) for param in nn.params: p=param.get_value(borrow = True) obj=theano.shared( value=np.ones( (p.shape), dtype=theano.config.floatX # @UndefinedVariable ), name = "xysdfa" , borrow=True ) #mlp_params.append(theano.printing.Print('asdfas')(param)) mlp_params.append(param) #obj=theano.shared(1) mean_square_t.append(obj) #mean_square_t = [np.zeros(param.shape) for param in mlp.params] #mean_square_t_minus_1 = np.zeros(mlp.params.shape); #mean_square_update = [(r, self.decay * r + (1 - self.decay) * g**2) for r, g in zip(mean_square_t, gparams)] new_mean_square_t =[self.decay * mt + (1-self.decay) * gp**2 for gp,mt in zip (gparams,mean_square_t)] mean_square_update =[(t,t_plus_1) for t,t_plus_1 in zip(mean_square_t,new_mean_square_t)] param_update =[(param, param - ( self.learning_rate*gp/(T.sqrt(mt+1e-8)))) for param, gp,mt in zip(mlp_params, gparams,new_mean_square_t) ] #mt+1e-8 #r_t_minus_1 = np.zeros(mlp.params.shape); #r_t = np.zeros(mlp.params.shape); # updates = [ # (param, param - self.learning_rate * gparam) # for param, gparam in zip(mlp.params, gparams) # ] train_model = theano.function( inputs=[index], outputs=cost, #mode='DebugMode', updates=mean_square_update + param_update, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) train_model_print = theano.function( inputs=[index], outputs=cost, mode='DebugMode', updates=mean_square_update + param_update, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) train_err_model = theano.function( inputs=[index], outputs=nn.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] } ) def validate(): validation_losses = [validate_err_model(i) for i in xrange(n_valid_batches)] this_validation_loss = np.mean(validation_losses) return this_validation_loss; def test(): test_losses = [test_err_model(i) for i in xrange(n_test_batches)] this_test_loss = np.mean(test_losses) return this_test_loss; def train(): train_losses = [train_err_model(i) for i in xrange(n_train_batches)] this_train_loss = np.mean(train_losses) return this_train_loss; print '... training' bestValidationLoss = np.inf minEpochs = 4 validationFrequency = n_train_batches; iteration = 0; directory=check_create_observations_dir() self.output_directory = directory max_epoch_reached = False while not max_epoch_reached : iteration = iteration + 1; epochNo = (iteration / n_train_batches) + 1 batchId= iteration % n_train_batches; currentCost=train_model(batchId) #print "Cost = %f" %(currentCost) if iteration % validationFrequency == 0: validation_err = validate() train_err = train() test_err = test() self.add_train_data(epochNo, train_err, validation_err, test_err) print "Epoch no: %d Validation Loss = %f" %(epochNo,validation_err*100) if validation_err < bestValidationLoss: bestValidationLoss = validation_err if epochNo > minEpochs and validation_err *self.early_stopping_threshold > bestValidationLoss: #print "------------------------Validation Loss = %f" %(validationLoss*100) break; if epochNo >= self.n_epochs: max_epoch_reached = True testLoss=test() trainer.save_errors(self, directory) repfields_final_path=os.path.join(directory,"repFields.png") W_vals=nn.W1.get_value(borrow=True) display(W_vals,repfields_final_path) print "iteration %d complete. Cost = %f Best Validation Loss = %f Test Loss = %f" %(iteration,currentCost,bestValidationLoss *100,testLoss *100)