def validation(self, dataloader, task_n=''): # this might possibly change for other incremental scenario # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() losses = AverageMeter() acc = AverageMeter() batch_timer.tic() orig_mode = self.training self.eval() for i, (input, target, task) in enumerate(dataloader): if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.predict(input, task_n) loss = self.criterion(output, target, task) losses.update(loss, input.size(0)) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. acc = accumulate_acc(output, target, task, acc) self.train(orig_mode) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format( acc=acc, time=batch_timer.toc())) return acc, losses
def validation(self, dataloader): # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() batch_timer.tic() orig_mode = self.training self.eval() for i, (input, target, task) in enumerate(dataloader): if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.predict(input) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. acc = accumulate_acc(output, target, task, acc) self.train(orig_mode) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}' .format(acc=acc,time=batch_timer.toc())) return acc.avg
def validation(self, dataloader, val_name=''): # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() batch_timer.tic() losses = AverageMeter() orth_losses = AverageMeter() sp_losses = AverageMeter() orig_mode = self.training self.model.eval() for i, (input, target, task) in enumerate(dataloader): # print(task) if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.predict(input) # this works only for the current SVD change it to be more genric loss, orth_loss, sp_loss = self.criterion(output, target, task) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. losses.update(loss, input.size(0)) orth_losses.update(orth_loss, input.size(0)) sp_losses.update(sp_loss, input.size(0)) acc = accumulate_acc(output, target, task, acc) self.model.train() self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}' .format(acc=acc,time=batch_timer.toc())) return acc, losses, orth_losses, sp_losses
def validation(self, dataloader): # This function doesn't distinguish tasks. batch_timer = Timer() val_acc = AverageMeter() losses = AverageMeter() batch_timer.tic() # self.hypermodel.eval() self.model.eval() for i, (inputs, target, task) in enumerate(dataloader): if self.config['gpu']: with torch.no_grad(): inputs = inputs.cuda() target = target.cuda() output = self.model.forward(inputs) loss = self.criterion(output, target, task, regularization=False) losses.update(loss, inputs.size(0)) for t in output.keys(): output[t] = output[t].detach() # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. val_acc = accumulate_acc(output, target, task, val_acc) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format( acc=val_acc, time=batch_timer.toc())) self.log(' * Val loss {loss.avg:.3f}, Total time {time:.2f}'.format( loss=losses, time=batch_timer.toc())) return val_acc.avg
def train(epoch, train_loader, learner, args): # This function optimize the objective # Initialize all meters data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() confusion = Confusion(args.out_dim) # Setup learner's configuration print('\n\n==== Epoch:{0} ===='.format(epoch)) learner.train() learner.step_schedule(epoch) # The optimization loop data_timer.tic() batch_timer.tic() if args.print_freq > 0: # Enable to print mini-log print('Itr |Batch time |Data Time |Loss') for i, (input, target) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time # Prepare the inputs if args.use_gpu: input = input.cuda() target = target.cuda() train_target, eval_target = prepare_task_target(input, target, args) # Optimization loss, output = learner.learn(input, train_target) # Update the performance meter confusion.add(output, eval_target) # Measure elapsed time batch_time.update(batch_timer.toc()) data_timer.toc() # Mini-Logs losses.update(loss, input.size(0)) if args.print_freq > 0 and ((i % args.print_freq == 0) or (i == len(train_loader) - 1)): print('[{0:6d}/{1:6d}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) # Loss-specific information if args.loss == 'CE': print('[Train] ACC: ', confusion.acc()) elif args.loss in ['KCL', 'MCL']: args.cluster2Class = confusion.optimal_assignment( train_loader.num_classes ) # Save the mapping in args to use in eval if args.out_dim <= 20: # Avoid to print a large confusion matrix confusion.show() print('Clustering scores:', confusion.clusterscores()) print('[Train] ACC: ', confusion.acc()) elif args.loss == 'DPS': confusion.show(width=15, row_labels=['GT_dis-simi', 'GT_simi'], column_labels=['Pred_dis-simi', 'Pred_simi']) print('[Train] similar pair f1-score:', confusion.f1score(1)) # f1-score for similar pair (label:1) print('[Train] dissimilar pair f1-score:', confusion.f1score(0))
def learn_batch(self, train_loader, val_loader=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset !') self.init_optimizer() # for epoch in range(self.config['schedule'][-1]): for epoch in range(self.config.nepoch): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() if self.config.scheduler : self.scheduler.step(epoch) for param_group in self.optimizer.param_groups: self.log('LR:',param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') for i, (input, target, task) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target, task) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, input.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() # Add wandb logging # log_dict = dict() # wandb.log(log_dict) if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) # Evaluate the performance of current task if val_loader != None: self.validation(val_loader)
def learn_batch(self, train_loader, val_loader=None, curr_global_decoder=None, local_vae=None, class_table=None, global_classes_list=None, task_id=None, n_codes=None, global_n_codes=None, new_task_data_processing='original'): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() print("Classifier: learning new task in '{}' new data processing mode". format(new_task_data_processing)) if new_task_data_processing == 'original': process_through_local_vae = False train_only_on_generated_data = False elif new_task_data_processing == 'original_through_vae': process_through_local_vae = True train_only_on_generated_data = False elif new_task_data_processing == 'generated': process_through_local_vae = False train_only_on_generated_data = True else: raise ValueError( "'new_task_data_processing' argument is invalid: '{}'. " "Valid values are: 'original', 'original_through_vae', 'generated." ) if self.score_generated_images_by_freezed_classifier: frozen_model = copy.deepcopy(self.model) frozen_model.eval() train_accs = [] val_accs = [] for epoch in range(self.config['base_schedule'][-1]): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\t Time\t\t\t Data\t\t\t Loss\t\t\t Acc') current_start = 0 if train_only_on_generated_data: n_tasks_to_generate = task_id + 1 else: n_tasks_to_generate = task_id if not train_only_on_generated_data and (task_id == 0): starting_points_fixed = np.array([[0]]) else: starting_points = [] for prev_task_id in range(n_tasks_to_generate): starting_points.append( np.random.permutation( np.array( range( math.ceil(global_n_codes[prev_task_id] / train_loader.batch_size))))) max_len = max([len(repeats) for repeats in starting_points]) starting_points_fixed = [] for points in starting_points: starting_points_fixed.append( np.pad(points, [0, max_len - len(points)], mode="reflect")) starting_points_fixed = np.array(starting_points_fixed) for i, (orig_input, orig_target, orig_task) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time batch_size = len(orig_task) # generate data so every task is equally represented with torch.no_grad(): if process_through_local_vae: orig_input, orig_target, _ = vae_utils.generate_current_data( local_vae.decoder, task_id, batch_size, current_start, global_classes_list, n_codes, global_n_codes) generate_impl = vae_utils.generate_previous_data if train_only_on_generated_data: # generate data from previous tasks and the current one generate_impl = vae_utils.generate_previous_and_current_data # clear original data orig_input, orig_target = torch.Tensor(), torch.Tensor( ) if train_only_on_generated_data or (task_id > 0): gen_input, gen_target_orig, _ = generate_impl( curr_global_decoder, task_id, batch_size, starting_points_fixed[:, current_start] * batch_size, global_classes_list, n_codes, global_n_codes) current_start += 1 else: gen_input = torch.Tensor() gen_target_orig = torch.Tensor() if self.score_generated_images_by_freezed_classifier: if task_id > 0: gen_target = frozen_model.forward( gen_input[:-batch_size]) gen_target = gen_target['All'] gen_target = F.softmax(gen_target, 1) if train_only_on_generated_data: targets_orig = self.one_hot_targets( gen_target_orig[-batch_size:]).to( local_vae.device) gen_target = torch.cat( [gen_target, targets_orig]) else: targets_orig = self.one_hot_targets( orig_target).to(local_vae.device) gen_target = torch.cat( [gen_target, targets_orig]) else: gen_target = gen_target_orig gen_target = self.one_hot_targets( gen_target, self.model.n_classes) else: gen_target = self.one_hot_targets( gen_target, self.model.n_classes) orig_target = self.one_hot_targets(orig_target, self.model.n_classes) if self.gpu: orig_input = orig_input.cuda() orig_target = orig_target.cuda() gen_input = gen_input.cuda() gen_target = gen_target.cuda() # merge original and generated data multi_input = torch.cat((orig_input, gen_input), 0) multi_target = torch.cat((orig_target, gen_target), 0) # zip and shuffle multibatch = list(zip(multi_input, multi_target)) random.shuffle(multibatch) # iterate over batches in multibatch multibatch_parted = zip(*(iter(multibatch), ) * batch_size) for part in multibatch_parted: input, target = zip(*part) # convert tuples of tensors into one tensor input = torch.stack(input) target = torch.stack(target) loss, output = self.update_model(input, target, None) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, None, acc) losses.update(loss, input.size(0)) batch_time.update( batch_timer.toc()) # measure elapsed time data_timer.toc() if ((self.config['base_print_freq'] > 0) and (i % self.config['base_print_freq'] == 0)) or (i + 1) == len(train_loader): self.log( '[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) train_accs.append(acc.avg) self.log( ' * Train on {} original batches, Acc {acc.avg:.3f}'.format( len(train_loader), acc=acc)) # Evaluate the performance of current task if val_loader != None: val_accs.append(self.validation(val_loader)) print("All epochs ended")
def train_(self, epochs, finetune=False): str_ = 'pretrain' self.str_ = str_ if finetune: self.switch_finetune() str_ = 'finetune' self.str_ = str_ for epoch in range(epochs): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() self.model.train() self.scheduler.step(epoch) if self.config['train_between']: if epoch == self.config['schedule'][0]: for param in self.model.parameters(): param.requires_grad = True #self.config['lr'] = 0.01 self.config['weight_decay'] = 5e-4 self.init_optimizer() if self.config['switch_all']: if epoch == self.config['switch_all']: self.config['weight_decay'] = 5e-3 for param in self.model.parameters(): param.requires_grad = True self.init_optimizer() #self.config['lr'] = 0.01 for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') self.log('{0} Epoch:{1}'.format(str_, epoch)) data_timer.tic() batch_timer.tic() for i, (input, target) in enumerate(self.train_loader): self.model.train() data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target) input = input.detach() target = target.detach() # measure accuracy and record loss acc = self.accumulate_acc(output, target, acc) losses.update(loss, input.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() self.n_iter = (epoch) * len(self.train_loader) + i self.writer.add_scalar(str_ + '/Loss_train', losses.avg, self.n_iter) self.writer.add_scalar(str_ + '/Acc_train', acc.avg, self.n_iter) # if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(self.train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) acc_v, loss_v = self.validation(self.test_loader) self.writer.add_scalar(str_ + '/Loss_test', loss_v.avg, self.n_iter) self.writer.add_scalar(str_ + '/Acc_test', acc_v.avg, self.n_iter) if epoch % self.save_after == 0 and epoch != 0: self.save_model(str_ + str(epoch))
def validation(self, test_loader, from_train=1): # this might possibly change for other incremental scenario # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() losses = AverageMeter() acc_5 = AverageMeter() acc_class = [ AverageMeter() for i in range(len(self.train_loader.dataset.class_list)) ] #[AverageMeter()] * len(self.train_loader.dataset.class_list) acc_class_5 = [ AverageMeter() for i in range(len(self.train_loader.dataset.class_list)) ] batch_timer.tic() orig_mode = self.training self.eval() for i, (input, target) in enumerate(test_loader): if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.forward(input) loss = self.criterion(output, target) losses.update(loss, input.size(0)) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. t_acc, acc_class = accuracy( output, target, topk=(1, ), avg_meters=acc_class ) #self.accumulate_acc(output, target, acc) t_acc_5, acc_class_5 = accuracy(output, target, topk=(5, ), avg_meters=acc_class_5) # import pdb; pdb.set_trace() acc.update(t_acc, len(target)) acc_5.update(t_acc_5, len(target)) class_list = self.train_loader.dataset.class_list.inverse acc_cl_1 = {} acc_cl_5 = {} #from accuracies obtained create inst size based accuracies inst_clss_lst = self.train_loader.dataset.class_inst_list # import pdb; pdb.set_trace() for ins_clss_, insts in inst_clss_lst.items(): cls_sum = sum([acc_class[inst].sum for inst in insts]) cls_cnt = sum([acc_class[inst].count for inst in insts]) if cls_cnt == 0: import pdb pdb.set_trace() inst_avg = cls_sum / cls_cnt self.writer.add_scalar(self.str_ + '/Acc_1_{}'.format(ins_clss_), inst_avg, self.n_iter) cls_sum_5 = sum([acc_class_5[inst].sum for inst in insts]) cls_cnt_5 = sum([acc_class_5[inst].count for inst in insts]) inst_avg_5 = cls_sum_5 / cls_cnt_5 self.writer.add_scalar(self.str_ + '/Acc_5_{}'.format(ins_clss_), inst_avg_5, self.n_iter) for idx, cl_ in class_list.items(): acc_cl_1[cl_] = [ acc_class[idx].avg, acc_class[idx].sum, acc_class[idx].count ] acc_cl_5[cl_] = [ acc_class_5[idx].avg, acc_class_5[idx].sum, acc_class_5[idx].count ] # self.log(' * Val Acc {acc.avg:.3f} for class {cls}, {acc.sum} / {acc.count} ' # .format(acc=acc_class[idx], cls=cl_)) self.train(orig_mode) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format( acc=acc, time=batch_timer.toc())) if from_train: return acc, losses else: return acc, acc_5, acc_cl_1, acc_cl_5, losses
def learn_batch(self, train_loader, val_loader=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() self.model.zero_grad() # epoch_iterator = tqdm(train_loader, desc="Iteration", disable=False) # global_step = 0 # epochs_trained = 0 # steps_trained_in_current_epoch = 0 # train_iterator = trange( # epochs_trained, int(self.config['schedule'][-1]), desc="Epoch", disable=False, # ) # for _ in train_iterator: for epoch in range(self.config['schedule'][-1]): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() self.scheduler.step(epoch) for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') for i, (inputs_1, inputs_2, inputs_3, target) in enumerate(train_loader): #changed here for creating 2d tensor input = torch.stack([inputs_1, inputs_2, inputs_3]).reshape( (8, -1)) task = 'mrpc' data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target, task) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, input.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() if ((self.config['print_freq'] > 0) and (i % self.config['print_freq'] == 0)) or (i + 1) == len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) # Evaluate the performance of current task if val_loader != None: self.validation(val_loader) #from regularization # 2.Backup the weight of current task task_param = {} for n, p in self.params.items(): task_param[n] = p.clone().detach() # 3.Calculate the importance of weights for current task importance = self.calculate_importance(train_loader) # Save the weight and importance of weights of current task self.task_count += 1 if self.online_reg and len(self.regularization_terms) > 0: # Always use only one slot in self.regularization_terms self.regularization_terms[1] = { 'importance': importance, 'task_param': task_param } else: # Use a new slot to store the task-specific information self.regularization_terms[self.task_count] = { 'importance': importance, 'task_param': task_param }
def learn_batch(self, train_loader, val_loader=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() schedule = self.schedule_stack.pop() for epoch in range(schedule): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() robust_err, robust_loss = -1, -1 # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() for i, (inputs, target, task) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time if self.gpu: inputs = inputs.cuda() target = target.cuda() loss, robust_err, robust_loss, output = self.update_model( inputs, target, task) inputs = inputs.detach() target = target.detach() self.tb.add_scalar(f"Loss/train - task {self.current_task}", loss, epoch) self.tb.add_scalar( f"Robust error/train - task {self.current_task}", robust_err, epoch) # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, inputs.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() self.log(' * Train Acc {acc.avg:.3f}, Loss {loss.avg:.3f}'.format( loss=losses, acc=acc)) self.log( f" * robust loss: {robust_loss:.10f} robust error: {robust_err:.10f}" ) # self.log(f" * model: {self.model.features_loss_term}") # Evaluate the performance of current task if val_loader is not None: self.validation(val_loader) self.scheduler.step()
def learn_batch(self, train_loader, val_loader=None, epochs=[0, 40], task_n=''): itrs = 0 if epochs[ 0] == 0: # Only for the first epoch of each task or classReset optimizer before incrementally learning self.task_num += 1 if self.reset_optimizer: self.log('Optimizer is reset!') self.freeze(task_n) self.init_optimizer(params=filter(lambda p: p.requires_grad, self.model.parameters())) data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() for epoch in range(epochs[0], epochs[1]): self.writer = SummaryWriter(log_dir="runs/" + self.exp_name) self.scheduler.step(epoch) # # # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') for i, (input, target, task) in enumerate(train_loader): self.n_iter = (epoch) * len(train_loader) + i + 1 data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target, task, task_n) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, input.size(0)) self.writer.add_scalar( 'Run' + str(self.config['run_num']) + '/Loss/train' + task_n, losses.avg, self.n_iter) self.writer.add_scalar( 'Run' + str(self.config['run_num']) + '/Accuracy/train' + task_n, acc.avg, self.n_iter) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() if ((self.config['print_freq'] > 0) and (i % self.config['print_freq'] == 0)) or (i + 1) == len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) # Evaluate the performance of current task if val_loader != None: acc_val, loss_val = self.validation(val_loader, task_n) self.writer.add_scalar( 'Run' + str(self.config['run_num']) + 'Loss/test' + task_n, loss_val.avg, self.n_iter) self.writer.add_scalar( 'Run' + str(self.config['run_num']) + 'Accuracy/test' + task_n, acc_val.avg, self.n_iter) self.writer.close()
def learn_batch(self, train_loader, val_loader=None, task_name=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() for epoch in range(self.config['schedule'][-1]): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() orth_losses = AverageMeter() sp_losses = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() #self.model.no_tbn() self.scheduler.step(epoch) for param_group in self.optimizer.param_groups: self.log('LR:',param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') task_n = 'blah' for i, (input, target, task) in enumerate(train_loader): # import pdb; pdb.set_trace() self.n_iter = (epoch) * len(train_loader) + i + 1 task_n = task[0] data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output, orth_loss, sp_loss = self.update_model(input, target, task, i) # print(f' Loss is {loss.data}') input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, input.size(0)) sp_losses.update(sp_loss, input.size(0)) orth_losses.update(orth_loss, input.size(0)) # print(f'updating loss, {losses.avg} {self.n_iter}') self.writer.add_scalar( '/All_Losses/train' + task[0], losses.avg, self.n_iter) self.writer.add_scalar( '/Orth_Loss/train' + task[0], orth_losses.avg, self.n_iter) self.writer.add_scalar( '/Sparsity_Loss/train' + task[0], sp_losses.avg, self.n_iter) self.writer.add_scalar('/Accuracy/train' + task[0], acc.avg, self.n_iter) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() # self.writer.add_scalar('Loss_train', losses.avg, self.n_iter) # self.writer.add_scalar('Acc_train' + task_n, acc.avg, self.n_iter) if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) # Evaluate the performance of current task if val_loader != None: v_acc, v_losses, v_orth_losses, v_sp_losses = self.validation(val_loader) self.writer.add_scalar( '/All_Losses/val' + task[0], v_losses.avg, self.n_iter) self.writer.add_scalar( '/Orth_Loss/val' + task[0], v_orth_losses.avg, self.n_iter) self.writer.add_scalar( '/Sparsity_Loss/val' + task[0], v_sp_losses.avg, self.n_iter) self.writer.add_scalar('/Accuracy/val' + task_n, v_acc.avg, self.n_iter)