def train(self, epoch): self.model.train() train_loss = AverageMeter() train_acc = AccuracyMeter() for i, (x, y) in enumerate(self.train_loader): x = Variable(x) y = Variable(y) if self.use_cuda: x = x.cuda() y = y.cuda() output = self.model(x) loss = F.cross_entropy(output, y) self.optimizer.zero_grad() loss.backward() clip_grad_norm(self.optimizer, max_norm=1) #防止梯度爆炸 self.optimizer.step() train_loss.update(float(loss.data), x.size(0)) y_pred = output.data.max(dim=1)[1] #correct = int(y_pred.eq(y.data).cpu().sum()) _, correct, _ = get_accuracy(y.data, y_pred) train_acc.update(correct, x.size(0)) if i % 100 == 0: print( '\nTrain Epoch/batch| [{}/{}]: Average batch loss:{:.6f},acc: {:.6f}\n' .format(epoch, i, train_loss.average, train_acc.accuracy)) #save_model_checkpoint(self.model,epoch,self.save) return train_loss.average, train_acc.accuracy
def train(model, data_loader, optimizer, criterion, device): """ :param model: :param criterion: :param device: :return: """ model.train() losses = AverageMeter() pbar = tqdm(data_loader) for image, target in pbar: image, target = image.to(device), target.to(device) optimizer.zero_grad() output = model(image) loss = criterion(output, target) loss.backward() optimizer.step() losses.update(loss.item(), image.size(0)) pbar.set_description('\ttrain => loss {:.4f}'.format(losses.avg), refresh=True) return losses.avg
def train(epoch): losses = AverageMeter() # switch to train mode model.train() if args.distribute: train_sampler.set_epoch(epoch) correct = 0 preds = [] train_labels = [] for i, (image, label) in enumerate(train_loader): rate = get_learning_rate(optimizer) image, label = image.cuda(), label.cuda() output = model(image) loss = criterion(output, label) optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss losses.update(loss.item(), image.size(0)) if i % args.print_freq == 0 or i == len(train_loader) - 1: print('Epoch: [{0}][{1}/{2}]\t' 'Rate:{rate}\t' 'Loss {loss.val:.5f} ({loss.avg:.5f})\t'.format( epoch, i, len(train_loader), rate=rate, loss=losses)) return
def validation(self, dataloader): # This function doesn't distinguish tasks. batch_timer = Timer() val_acc = AverageMeter() losses = AverageMeter() batch_timer.tic() # self.hypermodel.eval() self.model.eval() for i, (inputs, target, task) in enumerate(dataloader): if self.config['gpu']: with torch.no_grad(): inputs = inputs.cuda() target = target.cuda() output = self.model.forward(inputs) loss = self.criterion(output, target, task, regularization=False) losses.update(loss, inputs.size(0)) for t in output.keys(): output[t] = output[t].detach() # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. val_acc = accumulate_acc(output, target, task, val_acc) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format( acc=val_acc, time=batch_timer.toc())) self.log(' * Val loss {loss.avg:.3f}, Total time {time:.2f}'.format( loss=losses, time=batch_timer.toc())) return val_acc.avg
def validate(): losses = AverageMeter() val_acc1 = AverageMeter() # switch to evaluate mode model.eval() for i, (image, label) in enumerate(val_loader): with torch.no_grad(): image, label = image.cuda(), label.cuda() # compute output output = model(image) loss = criterion(output, label) # statistics val_acc = accuracy(output, label) val_acc1.update(val_acc.item(), image.size(0)) losses.update(loss.item(), image.size(0)) if i % args.print_freq == 0 or i == len(val_loader) - 1: print('[TEST]: {0}/{1}\tLoss {loss.val:.5f} ({loss.avg:.5f})'. format(i, len(val_loader), loss=losses)) if args.distribute: # Horovod: average metric values across workers. val_acc1.avg = metric_average(val_acc1.avg, 'val_acc') losses.vag = metric_average(losses.avg, 'losses.avg') return val_acc1.avg, losses.avg
def test(data_loader, network, args): batch_time = AverageMeter() # switch to evaluate mode network.eval() max_size = 64 * len(data_loader) images_bank = torch.zeros((max_size, args.feature_size)).cuda() text_bank = torch.zeros((max_size,args.feature_size)).cuda() labels_bank = torch.zeros(max_size).cuda() index = 0 with torch.no_grad(): end = time.time() for images, captions, labels, captions_length in data_loader: images = images.cuda() captions = captions.cuda() interval = images.shape[0] image_embeddings, text_embeddings = network(images, captions, captions_length) images_bank[index: index + interval] = image_embeddings text_bank[index: index + interval] = text_embeddings labels_bank[index:index + interval] = labels batch_time.update(time.time() - end) end = time.time() index = index + interval images_bank = images_bank[:index] text_bank = text_bank[:index] labels_bank = labels_bank[:index] #[ac_top1_t2i, ac_top10_t2i] = compute_topk(text_bank, images_bank, labels_bank, labels_bank, [1,10]) #[ac_top1_i2t, ac_top10_i2t] = compute_topk(images_bank, text_bank, labels_bank, labels_bank, [1,10]) ac_top1_i2t, ac_top10_i2t, ac_top1_t2i, ac_top10_t2i = compute_topk(images_bank, text_bank, labels_bank, labels_bank, [1,10], True) return ac_top1_i2t, ac_top10_i2t, ac_top1_t2i, ac_top10_t2i, batch_time.avg
def test(self, epoch): self.model.eval() top1 = AverageMeter() all_result = [] for batch_idx, data in enumerate(self.test_dataloader): images, labels, images_path = data['images'], data['labels'], data[ 'images_path'] if self.use_cuda: images, labels = images.cuda(), labels.cuda() outputs = self.model(images) prec1 = accuracy(outputs.data, labels.data, topk=(1, )) top1.update(prec1[0].detach().item(), images.size(0)) self.writer.add_scalar('test/acc', top1.val, self.iters) if self.args.is_save: probs, preds = outputs.softmax(dim=1).max(dim=1) probs, preds = probs.view(-1), preds.view(-1) for idx in range(images.size(0)): result = '{}\t{}\t{}\t{}\n'.format(images_path[idx], labels[idx].item(), preds[idx].item(), probs[idx].item()) all_result.append(result) if self.args.is_save: with open('result.txt', 'w') as f: f.writelines(all_result) self.acc = top1.avg print('Test epoch:{}, acc:{}'.format(epoch, top1.avg))
def eval(model, data_loader, criterion, device): """ :param model: :param data_loader: :param criterion: :param device: :return: """ model.eval() losses = AverageMeter() val_iou = [] pbar = tqdm(data_loader) with torch.no_grad(): for image, target in pbar: image, target = image.to(device), target.to(device) output = model(image) # calculate loss loss = criterion(output, target) # calculate iou pred = output.argmax(1) iou = get_iou(pred, target) val_iou.append(iou) losses.update(loss.item(), image.size(0)) pbar.set_description('eval loss {0}'.format(loss.item()), refresh=True) pbar.write('\teval => loss {:.4f}'.format(losses.avg)) return losses.avg, val_iou
def validation(self, dataloader): # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() batch_timer.tic() orig_mode = self.training self.eval() for i, (input, target, task) in enumerate(dataloader): if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.predict(input) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. acc = accumulate_acc(output, target, task, acc) self.train(orig_mode) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}' .format(acc=acc,time=batch_timer.toc())) return acc.avg
def validation(self, dataloader, task_n=''): # this might possibly change for other incremental scenario # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() losses = AverageMeter() acc = AverageMeter() batch_timer.tic() orig_mode = self.training self.eval() for i, (input, target, task) in enumerate(dataloader): if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.predict(input, task_n) loss = self.criterion(output, target, task) losses.update(loss, input.size(0)) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. acc = accumulate_acc(output, target, task, acc) self.train(orig_mode) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format( acc=acc, time=batch_timer.toc())) return acc, losses
def validate(self): self.model.eval() valid_loss = AverageMeter() valid_acc = AccuracyMeter() for i, (x, y) in enumerate(self.valid_loader): x = Variable(x, volatile=True) y = Variable(y).long() if self.use_cuda: x = x.cuda() y = y.cuda() output = self.model(x) loss = F.cross_entropy(output, y) valid_loss.update(float(loss.data)) y_pred = output.data.max(dim=1)[1] correct = int(y_pred.eq(y.data).cpu().sum()) valid_acc.update(correct, x.size(0)) print('\nTrain Epoch [{}]: Average batch loss: {:.6f}\n'.format(epoch,valid_acc.accuracy)) return valid_loss.average, valid_acc.accuracy
def eval_accuracies(hypothesis_list, reference_list, mode='valid'): """An unofficial evalutation helper. Arguments: hypothese_list: A mapping from instance id to predicted sequences. reference_list: A mapping from instance id to ground truth sequences. copy_info: Map of id --> copy information. sources: Map of id --> input text sequence. filename: print_copy_info: """ assert (sorted(reference_list.keys()) == sorted(hypothesis_list.keys())) # Compute BLEU _, bleu, ind_bleu = google_bleu.corpus_bleu(reference_list, hypothesis_list) # Compute ROGUE rouge_l, ind_rogue = Rouge().compute_score(reference_list, hypothesis_list) # Compute METEOR if mode == 'test': meteor, _ = Meteor().compute_score(reference_list, hypothesis_list) else: meteor = 0 # Compute F1, Precision, Recall f1, precision, recall = AverageMeter(), AverageMeter(), AverageMeter() """ hypothesis_list example { 0: ['the the the given fo...e the </s>'], 1: ['return the first for...e the </s>'], 2: ['setup the given for ...e the </s>'], 3: ['expand the given for...e the </s>'], 4: ['test that the given ...e the </s>'], 5: ['attach the given for...e the </s>'], 6: ['add the given for th...e the </s>'], 7: ['guess the given for ...e the </s>'], 8: ['return the given for...e the </s>'], 9: ['open the filepath fo...e the </s>'], 10: ['open the pathname fo...e the </s>'], 11: ['delete the given for...e the </s>'], 12: ['get the given for th...e the </s>'], 13: ['write the given for ...e the </s>'], ... } """ for key in reference_list.keys(): _precision, _recall, _f1 = F1().compute_eval_score( hypothesis_list[key][0], reference_list[key]) # update() - updates the dictionary with the element with other precision.update(_precision) recall.update(_recall) f1.update(_f1) return bleu, rouge_l, meteor, precision.avg, recall.avg, f1.avg
def learn_batch(self, train_loader, val_loader=None, curr_global_decoder=None, local_vae=None, class_table=None, global_classes_list=None, task_id=None, n_codes=None, global_n_codes=None, new_task_data_processing='original'): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() print("Classifier: learning new task in '{}' new data processing mode". format(new_task_data_processing)) if new_task_data_processing == 'original': process_through_local_vae = False train_only_on_generated_data = False elif new_task_data_processing == 'original_through_vae': process_through_local_vae = True train_only_on_generated_data = False elif new_task_data_processing == 'generated': process_through_local_vae = False train_only_on_generated_data = True else: raise ValueError( "'new_task_data_processing' argument is invalid: '{}'. " "Valid values are: 'original', 'original_through_vae', 'generated." ) if self.score_generated_images_by_freezed_classifier: frozen_model = copy.deepcopy(self.model) frozen_model.eval() train_accs = [] val_accs = [] for epoch in range(self.config['base_schedule'][-1]): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\t Time\t\t\t Data\t\t\t Loss\t\t\t Acc') current_start = 0 if train_only_on_generated_data: n_tasks_to_generate = task_id + 1 else: n_tasks_to_generate = task_id if not train_only_on_generated_data and (task_id == 0): starting_points_fixed = np.array([[0]]) else: starting_points = [] for prev_task_id in range(n_tasks_to_generate): starting_points.append( np.random.permutation( np.array( range( math.ceil(global_n_codes[prev_task_id] / train_loader.batch_size))))) max_len = max([len(repeats) for repeats in starting_points]) starting_points_fixed = [] for points in starting_points: starting_points_fixed.append( np.pad(points, [0, max_len - len(points)], mode="reflect")) starting_points_fixed = np.array(starting_points_fixed) for i, (orig_input, orig_target, orig_task) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time batch_size = len(orig_task) # generate data so every task is equally represented with torch.no_grad(): if process_through_local_vae: orig_input, orig_target, _ = vae_utils.generate_current_data( local_vae.decoder, task_id, batch_size, current_start, global_classes_list, n_codes, global_n_codes) generate_impl = vae_utils.generate_previous_data if train_only_on_generated_data: # generate data from previous tasks and the current one generate_impl = vae_utils.generate_previous_and_current_data # clear original data orig_input, orig_target = torch.Tensor(), torch.Tensor( ) if train_only_on_generated_data or (task_id > 0): gen_input, gen_target_orig, _ = generate_impl( curr_global_decoder, task_id, batch_size, starting_points_fixed[:, current_start] * batch_size, global_classes_list, n_codes, global_n_codes) current_start += 1 else: gen_input = torch.Tensor() gen_target_orig = torch.Tensor() if self.score_generated_images_by_freezed_classifier: if task_id > 0: gen_target = frozen_model.forward( gen_input[:-batch_size]) gen_target = gen_target['All'] gen_target = F.softmax(gen_target, 1) if train_only_on_generated_data: targets_orig = self.one_hot_targets( gen_target_orig[-batch_size:]).to( local_vae.device) gen_target = torch.cat( [gen_target, targets_orig]) else: targets_orig = self.one_hot_targets( orig_target).to(local_vae.device) gen_target = torch.cat( [gen_target, targets_orig]) else: gen_target = gen_target_orig gen_target = self.one_hot_targets( gen_target, self.model.n_classes) else: gen_target = self.one_hot_targets( gen_target, self.model.n_classes) orig_target = self.one_hot_targets(orig_target, self.model.n_classes) if self.gpu: orig_input = orig_input.cuda() orig_target = orig_target.cuda() gen_input = gen_input.cuda() gen_target = gen_target.cuda() # merge original and generated data multi_input = torch.cat((orig_input, gen_input), 0) multi_target = torch.cat((orig_target, gen_target), 0) # zip and shuffle multibatch = list(zip(multi_input, multi_target)) random.shuffle(multibatch) # iterate over batches in multibatch multibatch_parted = zip(*(iter(multibatch), ) * batch_size) for part in multibatch_parted: input, target = zip(*part) # convert tuples of tensors into one tensor input = torch.stack(input) target = torch.stack(target) loss, output = self.update_model(input, target, None) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, None, acc) losses.update(loss, input.size(0)) batch_time.update( batch_timer.toc()) # measure elapsed time data_timer.toc() if ((self.config['base_print_freq'] > 0) and (i % self.config['base_print_freq'] == 0)) or (i + 1) == len(train_loader): self.log( '[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) train_accs.append(acc.avg) self.log( ' * Train on {} original batches, Acc {acc.avg:.3f}'.format( len(train_loader), acc=acc)) # Evaluate the performance of current task if val_loader != None: val_accs.append(self.validation(val_loader)) print("All epochs ended")
def train_(self, epochs, finetune=False): str_ = 'pretrain' self.str_ = str_ if finetune: self.switch_finetune() str_ = 'finetune' self.str_ = str_ for epoch in range(epochs): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() self.model.train() self.scheduler.step(epoch) if self.config['train_between']: if epoch == self.config['schedule'][0]: for param in self.model.parameters(): param.requires_grad = True #self.config['lr'] = 0.01 self.config['weight_decay'] = 5e-4 self.init_optimizer() if self.config['switch_all']: if epoch == self.config['switch_all']: self.config['weight_decay'] = 5e-3 for param in self.model.parameters(): param.requires_grad = True self.init_optimizer() #self.config['lr'] = 0.01 for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') self.log('{0} Epoch:{1}'.format(str_, epoch)) data_timer.tic() batch_timer.tic() for i, (input, target) in enumerate(self.train_loader): self.model.train() data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target) input = input.detach() target = target.detach() # measure accuracy and record loss acc = self.accumulate_acc(output, target, acc) losses.update(loss, input.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() self.n_iter = (epoch) * len(self.train_loader) + i self.writer.add_scalar(str_ + '/Loss_train', losses.avg, self.n_iter) self.writer.add_scalar(str_ + '/Acc_train', acc.avg, self.n_iter) # if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(self.train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) acc_v, loss_v = self.validation(self.test_loader) self.writer.add_scalar(str_ + '/Loss_test', loss_v.avg, self.n_iter) self.writer.add_scalar(str_ + '/Acc_test', acc_v.avg, self.n_iter) if epoch % self.save_after == 0 and epoch != 0: self.save_model(str_ + str(epoch))
def validation(self, test_loader, from_train=1): # this might possibly change for other incremental scenario # This function doesn't distinguish tasks. batch_timer = Timer() acc = AverageMeter() losses = AverageMeter() acc_5 = AverageMeter() acc_class = [ AverageMeter() for i in range(len(self.train_loader.dataset.class_list)) ] #[AverageMeter()] * len(self.train_loader.dataset.class_list) acc_class_5 = [ AverageMeter() for i in range(len(self.train_loader.dataset.class_list)) ] batch_timer.tic() orig_mode = self.training self.eval() for i, (input, target) in enumerate(test_loader): if self.gpu: with torch.no_grad(): input = input.cuda() target = target.cuda() output = self.forward(input) loss = self.criterion(output, target) losses.update(loss, input.size(0)) # Summarize the performance of all tasks, or 1 task, depends on dataloader. # Calculated by total number of data. t_acc, acc_class = accuracy( output, target, topk=(1, ), avg_meters=acc_class ) #self.accumulate_acc(output, target, acc) t_acc_5, acc_class_5 = accuracy(output, target, topk=(5, ), avg_meters=acc_class_5) # import pdb; pdb.set_trace() acc.update(t_acc, len(target)) acc_5.update(t_acc_5, len(target)) class_list = self.train_loader.dataset.class_list.inverse acc_cl_1 = {} acc_cl_5 = {} #from accuracies obtained create inst size based accuracies inst_clss_lst = self.train_loader.dataset.class_inst_list # import pdb; pdb.set_trace() for ins_clss_, insts in inst_clss_lst.items(): cls_sum = sum([acc_class[inst].sum for inst in insts]) cls_cnt = sum([acc_class[inst].count for inst in insts]) if cls_cnt == 0: import pdb pdb.set_trace() inst_avg = cls_sum / cls_cnt self.writer.add_scalar(self.str_ + '/Acc_1_{}'.format(ins_clss_), inst_avg, self.n_iter) cls_sum_5 = sum([acc_class_5[inst].sum for inst in insts]) cls_cnt_5 = sum([acc_class_5[inst].count for inst in insts]) inst_avg_5 = cls_sum_5 / cls_cnt_5 self.writer.add_scalar(self.str_ + '/Acc_5_{}'.format(ins_clss_), inst_avg_5, self.n_iter) for idx, cl_ in class_list.items(): acc_cl_1[cl_] = [ acc_class[idx].avg, acc_class[idx].sum, acc_class[idx].count ] acc_cl_5[cl_] = [ acc_class_5[idx].avg, acc_class_5[idx].sum, acc_class_5[idx].count ] # self.log(' * Val Acc {acc.avg:.3f} for class {cls}, {acc.sum} / {acc.count} ' # .format(acc=acc_class[idx], cls=cl_)) self.train(orig_mode) self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format( acc=acc, time=batch_timer.toc())) if from_train: return acc, losses else: return acc, acc_5, acc_cl_1, acc_cl_5, losses
def train(epoch, train_loader, network, optimizer, compute_loss, args): batch_time = AverageMeter() train_loss = AverageMeter() image_pre = AverageMeter() text_pre = AverageMeter() # switch to train mode network.train() end = time.time() for step, (images, captions, labels, captions_length) in enumerate(train_loader): images = images.cuda() labels = labels.cuda() captions = captions.cuda() # compute loss image_embeddings, text_embeddings = network(images, captions, captions_length) cmpm_loss, cmpc_loss, loss, image_precision, text_precision, pos_avg_sim, neg_arg_sim = compute_loss( image_embeddings, text_embeddings, labels) if step % 10 == 0: print( 'epoch:{}, step:{}, cmpm_loss:{:.3f}, cmpc_loss:{:.3f}'.format( epoch, step, cmpm_loss, cmpc_loss)) # constrain embedding with the same id at the end of one epoch if (args.constraints_images or args.constraints_text) and step == len(train_loader) - 1: con_images, con_text = constraints_loss(train_loader, network, args) loss += (con_images + con_text) print( 'epoch:{}, step:{}, con_images:{:.3f}, con_text:{:.3f}'.format( epoch, step, con_images, con_text)) # compute gradient and do ADAM step optimizer.zero_grad() loss.backward() #nn.utils.clip_grad_norm(network.parameters(), 5) optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() train_loss.update(loss, images.shape[0]) image_pre.update(image_precision, images.shape[0]) text_pre.update(text_precision, images.shape[0]) return train_loss.avg, batch_time.avg, image_pre.avg, text_pre.avg
def train(train_loader, encoder, criterion, encoder_optimizer, epoch): r"""Performs one epoch's training. Arguments train_loader: DataLoader for training data encoder: encoder model criterion: loss layer encoder_optimizer: optimizer to update encoder's weights epoch: epoch number """ encoder.train() batch_time = AverageMeter() # forward prop. + back prop. time data_time = AverageMeter() # data loading time losses = AverageMeter() # loss (per word decoded) accs = AverageMeter() # acc accuracy start = time.time() # Batches for i, (imgs, tags) in enumerate(train_loader): data_time.update(time.time() - start) # Move to GPU, if available imgs = imgs.to(device) targets = tags.to(device) # Forward prop. scores = encoder(imgs) # Calculate loss loss = criterion(scores, targets) # Back prop. encoder_optimizer.zero_grad() loss.backward() # Clip gradients clip_gradient(encoder_optimizer, grad_clip) # Update weights encoder_optimizer.step() # Keep track of metrics acc = binary_accuracy(scores, targets) losses.update(loss.item()) accs.update(acc) batch_time.update(time.time() - start) start = time.time() # Print status if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top-5 Accuracy {accs.val:.3f} ({accs.avg:.3f})'.format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, accs=accs))
def train_epoch(self, train_loader, epoch, count_cls_step): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() end = time.time() for i, (inputs, target, task) in enumerate(train_loader): # print("*"*100) # print(inputs.mean()) count_cls_step += 1 data_time.update(time.time() - end) # measure data loading time if self.config['gpu']: inputs = inputs.cuda() target = target.cuda() output = self.model.forward(inputs) loss = self.criterion(output, target, task) acc = accumulate_acc(output, target, task, acc) self.model_optimizer.zero_grad() self.model_scheduler.step(epoch) loss.backward() self.model_optimizer.step() batch_time.update(time.time() - end) end = time.time() losses.update(loss, inputs.size(0)) if ((self.config['print_freq'] > 0) and (i % self.config['print_freq'] == 0)) or (i + 1) == len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) return losses.avg, acc.avg
def training(self, epoch): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() self.model.train() for i, (input, target) in tqdm(enumerate(self.train_loader), total=len(self.train_loader)): output = self.model(input) loss = self.criterion(output, target) # print(output) # Tensor(shape=[256, 1000] prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.numpy()[0], input.shape[0]) top1.update(prec1.numpy()[0], input.shape[0]) top5.update(prec5.numpy()[0], input.shape[0]) self.optimizer.clear_grad() loss.backward() self.optimizer.step() if i % self.cfg.Log_print_freq == 0: self.logger.info('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(self.train_loader), loss=losses, top1=top1, top5=top5)) prec1, prec5 = self.validate() if self.cfg.visualDL: with LogWriter(logdir=self.logDir) as writer: # 使用scalar组件记录一个标量数据 writer.add_scalar(tag="loss", step=epoch, value=losses.avg) writer.add_scalar(tag="prec1", step=epoch, value=prec1) writer.add_scalar(tag="prec5", step=epoch, value=prec5) self.logger.info("Epoch {}: prec1: {} prec5: {}".format(epoch, prec1, prec5)) return prec1, prec5, losses
def validate(val_loader, encoder, criterion): r"""Performs one epoch's validation. Arguments val_loader (Generator): DataLoader for validation data. encoder (nn.Module): encoder model criterion: loss layer Returns AverageMeter: Accuracy """ encoder.eval() batch_time = AverageMeter() losses = AverageMeter() accs = AverageMeter() start = time.time() # explicitly disable gradient calculation to avoid CUDA memory error # solves the issue #57 with torch.no_grad(): # Batches for i, (imgs, tags) in enumerate(val_loader): # Move to device, if available imgs = imgs.to(device) targets = tags.to(device) # Forward prop. scores = encoder(imgs) # Calculate loss loss = criterion(scores, targets) # Keep track of metrics losses.update(loss.item()) acc = binary_accuracy(scores, targets) accs.update(acc) batch_time.update(time.time() - start) start = time.time() if i % print_freq == 0: print('Validation: [{0}/{1}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Accuracy {accs.val:.3f} ({accs.avg:.3f})\t'.format(i, len(val_loader), batch_time=batch_time, loss=losses, accs=accs)) print( '\n * LOSS - {loss.avg:.3f}, ACCURACY - {acc.avg:.3f}\n'.format( loss=losses, acc=accs)) return accs
def learn_batch(self, train_loader, val_loader=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() schedule = self.schedule_stack.pop() for epoch in range(schedule): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() robust_err, robust_loss = -1, -1 # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() for i, (inputs, target, task) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time if self.gpu: inputs = inputs.cuda() target = target.cuda() loss, robust_err, robust_loss, output = self.update_model( inputs, target, task) inputs = inputs.detach() target = target.detach() self.tb.add_scalar(f"Loss/train - task {self.current_task}", loss, epoch) self.tb.add_scalar( f"Robust error/train - task {self.current_task}", robust_err, epoch) # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, inputs.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() self.log(' * Train Acc {acc.avg:.3f}, Loss {loss.avg:.3f}'.format( loss=losses, acc=acc)) self.log( f" * robust loss: {robust_loss:.10f} robust error: {robust_err:.10f}" ) # self.log(f" * model: {self.model.features_loss_term}") # Evaluate the performance of current task if val_loader is not None: self.validation(val_loader) self.scheduler.step()
def test(data_loader, network, args, unique_image): batch_time = AverageMeter() # switch to evaluate mode network.eval() max_size = 64 * len(data_loader) global_img_feat_bank = torch.zeros((max_size, args.feature_size)).cuda() global_text_feat_bank = torch.zeros((max_size, args.feature_size)).cuda() local_img_query_bank = torch.zeros((max_size, args.part2 + args.part3 + 1, args.feature_size)).cuda() local_img_value_bank = torch.zeros((max_size, args.part2 + args.part3 + 1, args.feature_size)).cuda() local_text_key_bank = torch.zeros((max_size, 98 + 2 + 1, args.feature_size)).cuda() local_text_value_bank = torch.zeros((max_size, 98 + 2 + 1, args.feature_size)).cuda() labels_bank = torch.zeros(max_size).cuda() length_bank = torch.zeros(max_size, dtype=torch.long).cuda() index = 0 with torch.no_grad(): end = time.time() for images, captions, labels in data_loader: sep_captions = [] n_sep = 2 for i, c in enumerate(captions): c = re.split(r'[;,!?.]', c) if len(c) > n_sep or len(c) == n_sep: sep_captions = sep_captions + c[0:n_sep] else: pad_length = n_sep - len(c) padding = ["[PAD]" for j in range(pad_length)] sep_captions = sep_captions + c + padding tokens, segments, input_masks, caption_length = network.module.language_model.pre_process(captions) sep_tokens, sep_segments, sep_input_masks, sep_caption_length = network.module.language_model.pre_process(sep_captions) tokens = tokens.cuda() segments = segments.cuda() input_masks = input_masks.cuda() caption_length = caption_length.cuda() sep_tokens = sep_tokens.cuda() sep_segments = sep_segments.cuda() sep_input_masks = sep_input_masks.cuda() images = images.cuda() labels = labels.cuda() interval = images.shape[0] p2 = [i for i in range(args.part2)] p3 = [i for i in range(args.part3)] global_img_feat, global_text_feat, local_img_query, local_img_value, local_text_key, local_text_value = network(images, tokens, segments, input_masks, sep_tokens, sep_segments, sep_input_masks, n_sep, p2, p3, stage='train') global_img_feat_bank[index: index + interval] = global_img_feat global_text_feat_bank[index: index + interval] = global_text_feat local_img_query_bank[index: index + interval, :, :] = local_img_query local_img_value_bank[index: index + interval, :, :] = local_img_value local_text_key_bank[index: index + interval, :, :] = local_text_key local_text_value_bank[index: index + interval, :, :] = local_text_value labels_bank[index:index + interval] = labels length_bank[index:index + interval] = caption_length batch_time.update(time.time() - end) end = time.time() index = index + interval global_img_feat_bank = global_img_feat_bank[:index] global_text_feat_bank = global_text_feat_bank[:index] local_img_query_bank = local_img_query_bank[:index] local_img_value_bank = local_img_value_bank[:index] local_text_key_bank = local_text_key_bank[:index] local_text_value_bank = local_text_value_bank[:index] labels_bank = labels_bank[:index] length_bank = length_bank[:index] unique_image = torch.tensor(unique_image) == 1 global_result, local_result, result = compute_topk(global_img_feat_bank[unique_image], local_img_query_bank[unique_image], local_img_value_bank[unique_image], global_text_feat_bank, local_text_key_bank, local_text_value_bank, length_bank, labels_bank[unique_image], labels_bank, args, [1, 5, 10], True) ac_top1_i2t, ac_top5_i2t, ac_top10_i2t, ac_top1_t2i, ac_top5_t2i, ac_top10_t2i = result return ac_top1_i2t, ac_top5_i2t, ac_top10_i2t, ac_top1_t2i, ac_top5_t2i , ac_top10_t2i, batch_time.avg
def train(epoch, train_loader, network, optimizer, compute_loss, args, co_location_loss=None): batch_time = AverageMeter() train_loss = AverageMeter() image_pre = AverageMeter() text_pre = AverageMeter() # switch to train mode network.train() end = time.time() for step, (images, captions, labels) in enumerate(train_loader): sep_captions = [] n_sep = 2 for i, c in enumerate(captions): c = c.split() s = math.floor(len(c) / n_sep) start = 0 for j in range(0, n_sep): if j == n_sep: sep_c = c[start:] else: sep_c = c[start:min(start + s, len(c))] sep_captions.append(' '.join(sep_c)) start += s tokens, segments, input_masks, caption_length = network.module.language_model.pre_process(captions) sep_tokens, sep_segments, sep_input_masks, sep_caption_length = network.module.language_model.pre_process(sep_captions) tokens = tokens.cuda() segments = segments.cuda() input_masks = input_masks.cuda() caption_length = caption_length.cuda() sep_tokens = sep_tokens.cuda() sep_segments = sep_segments.cuda() sep_input_masks = sep_input_masks.cuda() images = images.cuda() labels = labels.cuda() p2 = [i for i in range(args.part2)] p3 = [i for i in range(args.part3)] random.shuffle(p2) random.shuffle(p3) global_img_feat, global_text_feat, local_img_query, local_img_value, local_text_key, local_text_value = network(images, tokens, segments, input_masks, sep_tokens, sep_segments, sep_input_masks, n_sep, p2, p3, stage='train') cmpm_loss, cmpc_loss, cont_loss, loss, image_precision, text_precision, pos_avg_sim, neg_arg_sim, local_pos_avg_sim, local_neg_avg_sim = compute_loss( global_img_feat, global_text_feat, local_img_query, local_img_value, local_text_key, local_text_value, caption_length, labels) if step % 10 == 0: print('epoch:{}, step:{}, cmpm_loss:{:.3f}, cmpc_loss:{:.3f}, cont_loss:{:.3f}, pos_sim_avg:{:.3f}, neg_sim_avg:{:.3f}, lpos_sim_avg:{:.3f}, lneg_sim_avg:{:.3f}'. format(epoch, step, cmpm_loss, cmpc_loss, cont_loss, pos_avg_sim, neg_arg_sim, local_pos_avg_sim, local_neg_avg_sim)) # constrain embedding with the same id at the end of one epoch if (args.constraints_images or args.constraints_text) and step == len(train_loader) - 1: con_images, con_text = constraints_loss(train_loader, network, args) loss += (con_images + con_text) print('epoch:{}, step:{}, con_images:{:.3f}, con_text:{:.3f}'.format(epoch, step, con_images.item(), con_text.item())) # compute gradient and do ADAM step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() train_loss.update(loss.item(), images.shape[0]) image_pre.update(image_precision, images.shape[0]) text_pre.update(text_precision, images.shape[0]) return train_loss.avg, batch_time.avg, image_pre.avg, text_pre.avg
def learn_batch(self, site_name, train_loader, val_loader=None): if self.config['reset_optimizer']: self.log('Optimizer is reset!') self.init_optimizer() else: self.optimizer.param_groups[0]['lr'] = self.config['lr'] for self.epoch in range(self.config['epoches']): losses_seg = AverageMeter() losses_regression = AverageMeter() losses_embedding = AverageMeter() losses = AverageMeter() self.model.train() self.scheduler.step() with tqdm(total=len(train_loader), desc='Epoch %d/%d' % (self.epoch+1,self.config['epoches']), unit='batch') \ as pbar: for batch in train_loader: imgs, gts = batch['img'], batch['gt'] if self.gpu: imgs, gts = imgs.cuda(), gts.cuda() loss_seg, loss_regression, loss_embedding, loss = self.update_model(imgs, gts) losses_seg.update(loss_seg, imgs.shape[0]) losses_regression.update(loss_regression, imgs.shape[0]) losses_embedding.update(loss_embedding, imgs.shape[0]) losses.update(loss, imgs.shape[0]) pbar.set_postfix({'loss_seg': '{0:.4f}'.format(losses_seg.val), 'loss_regression': '{0:.4f}'.format(losses_regression.val), 'loss_embedding': '{0:.4f}'.format(losses_embedding.val), 'loss':'{0:.4f}'.format(losses.val), 'lr':'{0:.5f}'.format(self.optimizer.param_groups[0]['lr']) }) pbar.update(1) self.log(' * Train Epoch: {epoch:n}, ' 'LearningRate {lr:.5f}, SegLoss {losses_seg.avg:.4f}, RegressionLoss {losses_regression.avg:.4f}, ' 'EmbeddingLoss {losses_embedding.avg:.4f}, ' 'Loss {losses.avg:.4f}'.format(epoch=self.epoch + 1, lr=self.optimizer.param_groups[0]['lr'], losses_seg=losses_seg, losses_regression=losses_regression, losses_embedding=losses_embedding, losses=losses)) if (self.epoch + 1) % 1 == 0: model_dir = os.path.join(self.exp_dir, site_name) os.makedirs(model_dir, exist_ok=True) self.save_model(filename=os.path.join(model_dir,'Epoch_%d_Dice_%.4f' % (self.epoch + 1, val_dice)))
def learn_batch(self, train_loader, val_loader=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset!') self.init_optimizer() self.model.zero_grad() # epoch_iterator = tqdm(train_loader, desc="Iteration", disable=False) # global_step = 0 # epochs_trained = 0 # steps_trained_in_current_epoch = 0 # train_iterator = trange( # epochs_trained, int(self.config['schedule'][-1]), desc="Epoch", disable=False, # ) # for _ in train_iterator: for epoch in range(self.config['schedule'][-1]): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() self.scheduler.step(epoch) for param_group in self.optimizer.param_groups: self.log('LR:', param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') for i, (inputs_1, inputs_2, inputs_3, target) in enumerate(train_loader): #changed here for creating 2d tensor input = torch.stack([inputs_1, inputs_2, inputs_3]).reshape( (8, -1)) task = 'mrpc' data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target, task) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, input.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() if ((self.config['print_freq'] > 0) and (i % self.config['print_freq'] == 0)) or (i + 1) == len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) # Evaluate the performance of current task if val_loader != None: self.validation(val_loader) #from regularization # 2.Backup the weight of current task task_param = {} for n, p in self.params.items(): task_param[n] = p.clone().detach() # 3.Calculate the importance of weights for current task importance = self.calculate_importance(train_loader) # Save the weight and importance of weights of current task self.task_count += 1 if self.online_reg and len(self.regularization_terms) > 0: # Always use only one slot in self.regularization_terms self.regularization_terms[1] = { 'importance': importance, 'task_param': task_param } else: # Use a new slot to store the task-specific information self.regularization_terms[self.task_count] = { 'importance': importance, 'task_param': task_param }
def learn_batch(self, site_name, train_loader, val_loader=None): if self.config['reset_optimizer']: self.log('Optimizer is reset!') self.init_optimizer() else: self.optimizer.param_groups[0]['lr'] = self.config['lr'] for self.epoch in range(self.config['epoches']): losses_bce = AverageMeter() losses_dice = AverageMeter() losses = AverageMeter() self.model.train() self.scheduler.step() with tqdm(total=len(train_loader), desc='Epoch %d/%d' % (self.epoch+1,self.config['epoches']), unit='batch') \ as pbar: for batch in train_loader: imgs, gts = batch['img'], batch['gt'] if self.gpu: imgs, gts = imgs.cuda(), gts.cuda() loss_bce, loss_dice, loss, outs = self.update_model( imgs, gts) imgs = imgs.detach() gts = gts.detach() losses_bce.update(loss_bce, imgs.size(0)) losses_dice.update(loss_dice, imgs.size(0)) losses.update(loss, imgs.size(0)) pbar.set_postfix({ 'bce_loss': '{0:.4f}'.format(losses_bce.val), 'dice_loss': '{0:.4f}'.format(losses_dice.val), 'loss': '{0:.4f}'.format(losses.val), 'lr': '{0:.5f}'.format(self.optimizer.param_groups[0]['lr']) }) pbar.update(1) # print result in each epoch self.log( ' * Train Epoch: {epoch:n}, ' 'LearningRate {lr:.5f}, BCELoss {losses_bce.avg:.4f}, DiceLoss {losses_dice.avg:.4f}, ' 'Loss {losses.avg:.4f}'.format( epoch=self.epoch + 1, lr=self.optimizer.param_groups[0]['lr'], losses_bce=losses_bce, losses_dice=losses_dice, losses=losses)) # save model if (self.epoch + 1) % 1 == 0: model_dir = os.path.join(self.exp_dir, site_name) os.makedirs(model_dir, exist_ok=True) self.save_model(filename=os.path.join( model_dir, 'Epoch_%d_Dice_%.4f' % (self.epoch + 1, val_dice)))
def learn_batch(self, train_loader, val_loader=None): if self.reset_optimizer: # Reset optimizer before learning each task self.log('Optimizer is reset !') self.init_optimizer() # for epoch in range(self.config['schedule'][-1]): for epoch in range(self.config.nepoch): data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() # Config the model and optimizer self.log('Epoch:{0}'.format(epoch)) self.model.train() if self.config.scheduler : self.scheduler.step(epoch) for param_group in self.optimizer.param_groups: self.log('LR:',param_group['lr']) # Learning with mini-batch data_timer.tic() batch_timer.tic() self.log('Itr\t\tTime\t\t Data\t\t Loss\t\tAcc') for i, (input, target, task) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time if self.gpu: input = input.cuda() target = target.cuda() loss, output = self.update_model(input, target, task) input = input.detach() target = target.detach() # measure accuracy and record loss acc = accumulate_acc(output, target, task, acc) losses.update(loss, input.size(0)) batch_time.update(batch_timer.toc()) # measure elapsed time data_timer.toc() # Add wandb logging # log_dict = dict() # wandb.log(log_dict) if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader): self.log('[{0}/{1}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})\t' '{acc.val:.2f} ({acc.avg:.2f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=acc)) self.log(' * Train Acc {acc.avg:.3f}'.format(acc=acc)) # Evaluate the performance of current task if val_loader != None: self.validation(val_loader)
def validate(self): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode self.model.eval() for i, (input, target) in enumerate(self.val_loader): output = self.model(input) loss = self.criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.numpy()[0], input.shape[0]) top1.update(prec1.numpy()[0], input.shape[0]) top5.update(prec5.numpy()[0], input.shape[0]) self.logger.info(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return top1.avg, top5.avg
def train(epoch, train_loader, learner, args): # This function optimize the objective # Initialize all meters data_timer = Timer() batch_timer = Timer() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() confusion = Confusion(args.out_dim) # Setup learner's configuration print('\n\n==== Epoch:{0} ===='.format(epoch)) learner.train() learner.step_schedule(epoch) # The optimization loop data_timer.tic() batch_timer.tic() if args.print_freq > 0: # Enable to print mini-log print('Itr |Batch time |Data Time |Loss') for i, (input, target) in enumerate(train_loader): data_time.update(data_timer.toc()) # measure data loading time # Prepare the inputs if args.use_gpu: input = input.cuda() target = target.cuda() train_target, eval_target = prepare_task_target(input, target, args) # Optimization loss, output = learner.learn(input, train_target) # Update the performance meter confusion.add(output, eval_target) # Measure elapsed time batch_time.update(batch_timer.toc()) data_timer.toc() # Mini-Logs losses.update(loss, input.size(0)) if args.print_freq > 0 and ((i % args.print_freq == 0) or (i == len(train_loader) - 1)): print('[{0:6d}/{1:6d}]\t' '{batch_time.val:.4f} ({batch_time.avg:.4f})\t' '{data_time.val:.4f} ({data_time.avg:.4f})\t' '{loss.val:.3f} ({loss.avg:.3f})'.format( i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) # Loss-specific information if args.loss == 'CE': print('[Train] ACC: ', confusion.acc()) elif args.loss in ['KCL', 'MCL']: args.cluster2Class = confusion.optimal_assignment( train_loader.num_classes ) # Save the mapping in args to use in eval if args.out_dim <= 20: # Avoid to print a large confusion matrix confusion.show() print('Clustering scores:', confusion.clusterscores()) print('[Train] ACC: ', confusion.acc()) elif args.loss == 'DPS': confusion.show(width=15, row_labels=['GT_dis-simi', 'GT_simi'], column_labels=['Pred_dis-simi', 'Pred_simi']) print('[Train] similar pair f1-score:', confusion.f1score(1)) # f1-score for similar pair (label:1) print('[Train] dissimilar pair f1-score:', confusion.f1score(0))
def reset_bn(model: nn.Module, data_loader, sync=False, backend="ddp", progress_bar=False) -> None: bn_mean = {} bn_var = {} tmp_model = copy.deepcopy(model) for name, m in tmp_model.named_modules(): if isinstance(m, _BatchNorm): bn_mean[name] = AverageMeter() bn_var[name] = AverageMeter() def new_forward(bn, mean_est, var_est): def lambda_forward(x): x = x.contiguous() if sync: batch_mean = (x.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True)) # 1, C, 1, 1 if backend == "ddp": batch_mean = ddp_reduce_tensor(batch_mean, reduce="cat") else: raise NotImplementedError batch_mean = torch.mean(batch_mean, dim=0, keepdim=True) batch_var = (x - batch_mean) * (x - batch_mean) batch_var = (batch_var.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True)) if backend == "ddp": batch_var = ddp_reduce_tensor(batch_var, reduce="cat") else: raise NotImplementedError batch_var = torch.mean(batch_var, dim=0, keepdim=True) else: batch_mean = (x.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True)) # 1, C, 1, 1 batch_var = (x - batch_mean) * (x - batch_mean) batch_var = (batch_var.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True)) batch_mean = torch.squeeze(batch_mean) batch_var = torch.squeeze(batch_var) mean_est.update(batch_mean.data, x.size(0)) var_est.update(batch_var.data, x.size(0)) # bn forward using calculated mean & var _feature_dim = batch_mean.shape[0] return F.batch_norm( x, batch_mean, batch_var, bn.weight[:_feature_dim], bn.bias[:_feature_dim], False, 0.0, bn.eps, ) return lambda_forward m.forward = new_forward(m, bn_mean[name], bn_var[name]) # skip if there is no batch normalization layers in the network if len(bn_mean) == 0: return tmp_model.eval() with torch.no_grad(): with tqdm(total=len(data_loader), desc="reset bn", disable=(not progress_bar)) as t: for images, _ in data_loader: images = images.cuda() tmp_model(images) t.set_postfix({ "batch_size": images.size(0), "image_size": images.size(2), }) t.update() for name, m in model.named_modules(): if name in bn_mean and bn_mean[name].count > 0: feature_dim = bn_mean[name].avg.size(0) assert isinstance(m, _BatchNorm) m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg) m.running_var.data[:feature_dim].copy_(bn_var[name].avg)