def _train_epoch(self): self.model.train() losses = AverageMeter() Ntop1 = AverageMeter() Ntop5 = AverageMeter() Ctop1 = AverageMeter() Ctop5 = AverageMeter() # with tqdm(self.train_loader) as progress: # for batch_idx, (inputs, noisy_labels, soft_labels, gt_labels, index) in enumerate(progress): for batch_idx, (inputs, noisy_labels, soft_labels, gt_labels, index) in enumerate(self.train_loader): # progress.set_description_str(f'Train epoch {epoch}') # print(batch_idx) inputs, noisy_labels, soft_labels, gt_labels = inputs.cuda(),noisy_labels.cuda(),soft_labels.cuda(),gt_labels.cuda() # corrupted_flag = ~ noisy_labels.eq(gt_labels) clean_flag = noisy_labels.eq(gt_labels) outputs = self.model(inputs) # 这样操作是否batch大小,或者说是梯度大小?需要检查一下 TODO # loss = (self.train_criterion(outputs[0],gt_labels)).mean() if self.args.extra == 'all_noisy_samples': loss = (self.train_criterion(outputs[0],noisy_labels)).mean() elif self.args.extra == 'all_samples_gt': loss = (self.train_criterion(outputs[0],gt_labels)).mean() elif self.args.extra == 'only_clean_samples': loss = (self.train_criterion(outputs[0],noisy_labels) * clean_flag).mean() # loss_clean = (self.train_criterion(outputs[0],noisy_labels) * clean_flag).mean() # loss = loss_clean # loss_corrupted = (self.train_criterion(outputs[1],noisy_labels) * corrupted_flag).mean() # pdb.set_trace() self.optimizer.zero_grad() # loss_corrupted.backward(retain_graph=True) # loss_clean.backward() loss.backward() # ################ print log # for group in self.optimizer.param_groups: # for p in group['params']: # print(p.grad) self.optimizer.step() Nprec1, Nprec5 = accuracy(outputs[0],noisy_labels,topk=(1,5)) Cprec1, Cprec5 = accuracy(outputs[0],gt_labels,topk=(1,5)) losses.update(loss.item(), inputs.size(0)) Ntop1.update(Nprec1.item(), inputs.size(0)) Ntop5.update(Nprec5.item(), inputs.size(0)) Ctop1.update(Cprec1.item(), inputs.size(0)) Ctop5.update(Cprec5.item(), inputs.size(0)) val_loss, val_acc1, _ = self._val_epoch() test_loss, test_acc1, _ = self._test_epoch() log = {'train_loss':losses.avg, 'train_N_acc_1':Ntop1.avg, 'train_C_acc_1':Ctop1.avg, 'val_loss':val_loss, 'val_acc_1':val_acc1, 'test_loss':test_loss, 'test_acc_1':test_acc1} return log
if __name__ == '__main__': filename = 'gaze_estimation.pt' model = GazeEstimationModel() model.load_state_dict(torch.load(filename)) model = model.to(device) model.eval() # train mode (dropout and batchnorm is used) val_loader = torch.utils.data.DataLoader(GazeEstimationDataset('val'), batch_size=1, shuffle=False, num_workers=num_workers) criterion = nn.SmoothL1Loss() losses = AverageMeter() l_losses = AverageMeter() p_losses = AverageMeter() # Batches for (img, lbl_look_vec, lbl_pupil_size) in tqdm(val_loader): # Move to GPU, if available img = img.to(device) lbl_look_vec = lbl_look_vec.float().to(device) # [N, 3] lbl_pupil_size = lbl_pupil_size.float().to(device) # [N, 1] # Forward prop. with torch.no_grad(): out_look_vec, out_pupil_size = model(img) # embedding => [N, 3] # Calculate loss
def valid_epoch(model, valid_dataloader, lam, print_freq=40, classify_model=None): batch_time = AverageMeter() losses = AverageMeter() error1 = AverageMeter() error2 = AverageMeter() end = time.time() model.eval() cost = cal_loss classify_model.eval() custdv, cumean = torch.tensor(stdv).cuda(), torch.tensor(mean).cuda() for i, (images, labels, noise) in enumerate(valid_dataloader): #print(type(images),type(labels)) #print(images.shape,labels.shape) #if i>30: # return batch_time.avg, losses.avg,error2.avg images, labels, noise = images.cuda(), labels.cuda( ), noise.cuda().float() * args.phi #print(images.shape) #print(images.shape,labels.shape) #optimizer.zero_grad() outputs = model(images, noise=noise) #outputs=model(images,noise=noise) with torch.no_grad(): y1 = classify_model(images).detach() y2 = classify_model(outputs) #loss=torch.abs(y1-y2).mean() l2, perception_loss = cal_loss2(images, outputs).mean(), cost(y2, y1).mean() if i % print_freq == 0: print('l2={},perception_loss={}'.format(l2, perception_loss)) loss = l2 + perception_loss batch_size = labels.size(0) losses.update(loss.item(), batch_size) y1 = y1.max(1)[1] y2 = y2.max(1)[1] #if i%print_freq==0: # d_clean_y=d_clean_y.max(1)[1] # adv_y=adv_y.max(1)[1] error1.update( torch.ne(y1.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) error2.update( torch.ne(y2.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: ims, ims1 = convert(outputs, cumean, custdv, images) for im in ims1: im = im * 255. im = np.rint(im).astype(np.uint8) #print(im.shape) cv2.imwrite( str(i // print_freq % print_freq) + "valid.jpg", im) #cv2.waitKey(1) for im in ims: im = im * 255. im = np.rint(im).astype(np.uint8) #print(im.shape) cv2.imwrite( str(i // print_freq % print_freq) + "validrec.jpg", im) res = '\t'.join([ 'valid:', 'Iter: [%d/%d]' % (i + 1, len(valid_dataloader)), 'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg), 'Loss %.4f (%.4f)' % (losses.val, losses.avg), 'Error %.4f (%.4f)' % (error2.val, error2.avg), 'raw Error %.4f (%.4f)' % (error1.val, error1.avg) ]) print(res) return batch_time.avg, losses.avg, error2.avg
pin_memory=pin_memory) print("Initializing model: {}".format(arch)) model = models.init_model(name=arch, num_classes=len(classes), is_trained=trained) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) # print(model) print("Initializing optimizer: {}".format(optim)) optimizer = init_optim(optim, model.parameters(), learning_rate, weight_decay, momentum) if use_gpu: model = nn.DataParallel(model).cuda() model.train() losses = AverageMeter() for batch_idx, tuple_i in enumerate(valid_loader): data, target = tuple_i data = Variable(torch.FloatTensor(data).cuda(), requires_grad=True) target = Variable(torch.FloatTensor(target).cuda()) output = model(data) print(output.shape) break
def train_model(train_loader, model, vgg, criterion, optimizer, epoch, tb_writer): losses = AverageMeter() hole_losses = AverageMeter() valid_losses = AverageMeter() style_losses = AverageMeter() content_losses = AverageMeter() tv_losses = AverageMeter() s1 = AverageMeter() s2 = AverageMeter() s3 = AverageMeter() s4 = AverageMeter() s5 = AverageMeter() # ensure model is in train mode model.train() pbar = tqdm(train_loader) for i, data in enumerate(pbar): inputs = data['hole_img'].float() labels = data['ori_img'].float() ori_img = labels.clone() # mask: 1 for the hole and 0 for others masks = data['mask'].float() inputs = inputs.to(config.device) labels = labels.to(config.device) masks = masks.to(config.device) ori_img = ori_img.to(config.device) # pass this batch through our model and get y_pred outputs = model(inputs) # use five different level features, each are extracted after down-sampling targets = vgg(ori_img) features = vgg(outputs) # get content and style loss content_loss = 0 style_loss = 0 now_style_loss = [0.0, 0.0, 0.0, 0.0, 0.0] # np.ndarray(shape=(5, )) for k in range(inputs.size(0)): content_loss += torch.sum((features[3][k] - targets[3][k]) ** 2) / 2 # now_content_loss = F.mse_loss(features[3][k], targets[3][k]) # content_loss = content_loss + now_content_loss targets_gram = [gram_matrix(f[k]) for f in targets] features_gram = [gram_matrix(f[k]) for f in features] # style_loss += torch.sum(torch.mean((targets - features_gram) ** 2, dim = 0)) for j in range(len(targets_gram)): now_style_loss[j] = torch.sum((features_gram[j] - targets_gram[j]) ** 2) style_loss = style_loss + now_style_loss[j] style_loss /= inputs.size(0) content_loss /= inputs.size(0) style_losses.update(style_loss.item(), inputs.size(0)) content_losses.update(content_loss.item(), inputs.size(0)) # update loss metric # suppose criterion is L1 loss hole_loss = criterion(outputs * masks, labels * masks) valid_loss = criterion(outputs * (1 - masks), labels * (1 - masks)) hole_losses.update(hole_loss.item(), inputs.size(0)) valid_losses.update(valid_loss.item(), inputs.size(0)) write_avgs([s1, s2, s3, s4, s5], now_style_loss) # get total variation loss outputs_hole = outputs * masks targets_hole = labels * masks tv_loss = torch.sum(torch.abs(outputs_hole[:, :, :, 1:] - targets_hole[:, :, :, :-1])) \ + torch.sum(torch.abs(outputs_hole[:, :, 1:, :] - targets_hole[:, :, :-1, :])) tv_loss /= inputs.size(0) tv_losses.update(tv_loss.item(), inputs.size(0)) # total loss loss = hole_loss * rHole_Loss_weight + valid_loss * rValid_Loss_weight + \ style_loss * rStyle_Loss_weight + content_loss * rContent_Loss_weight + \ tv_loss * rTv_Loss_weight losses.update(loss.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() pbar.set_description("EPOCH[{}][{}/{}]".format(epoch, i, len(train_loader))) pbar.set_postfix( loss="LOSS:{:.4f}".format(losses.avg)) tb_writer.add_scalar('train/epoch_loss', losses.avg, epoch) tb_writer.add_scalar('train/hole_loss', hole_losses.avg * Hole_Loss_weight, epoch) tb_writer.add_scalar('train/valid_loss', valid_losses.avg * Valid_Loss_weight, epoch) tb_writer.add_scalar('train/style_loss', style_losses.avg * Style_Loss_weight, epoch) tb_writer.add_scalar('train/content_loss', content_losses.avg * Content_Loss_weight, epoch) tb_writer.add_scalar('train/tv_loss', tv_losses.avg * Tv_Loss_weight, epoch) write_tensor(perceptual_style_name, [s1, s2, s3, s4, s5], epoch, tb_writer) torch.cuda.empty_cache() return
def train(train_loader, M_loader, model, criterion, optimizer, epoch, log, args, avg_norm): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() num_batches = len(train_loader) // args.steps warmup_steps = num_batches * args.warmup total_steps = num_batches * args.epochs f_time = 0 b_time = 0 d_time = 0 start_time = time.time() if epoch == 0: if args.lw: compute_M(model, criterion, optimizer, M_loader, avg_norm, iters=args.M_iters) optimizer.zero_grad() for i, (inputs, target) in enumerate(train_loader): if (epoch*len(train_loader)+i) % args.steps == 0: poly_lr_rate((epoch*len(train_loader)+i)//args.steps, warmup_steps, total_steps, optimizer, args.learning_rate*args.batch_size*args.steps/128) if args.use_cuda: inputs = inputs.cuda(async=True) target = target.cuda() input_var = torch.autograd.Variable(inputs) target_var = torch.autograd.Variable(target) d_time += time.time() - start_time # compute output start_time = time.time() output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) f_time += time.time() - start_time # compute gradient and do SGD step start_time = time.time() loss.backward() if (epoch*len(train_loader)+i+1) % args.steps == 0: if args.lars: optimizer.step(avg_norm=avg_norm) else: optimizer.step() if args.lw and epoch < args.lw_epochs: compute_M(model, criterion, optimizer, M_loader, avg_norm, iters=args.M_iters) optimizer.eta = args.lw_eta else: avg_norm = [] optimizer.eta = args.eta optimizer.zero_grad() b_time += time.time() - start_time start_time = time.time() #print('trainingtime : f_time : {} b_time: {} d_time: {}'.format(f_time, b_time, d_time)) return top1.avg, top5.avg, losses.avg
def train(epoch, model, optimizer, scheduler, criterion, train_loader, config, writer, AT): global global_step run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] logger.info('Train {}'.format(epoch)) model.train() loss_meter = AverageMeter() accuracy_meter = AverageMeter() start = time.time() for step, (data, targets) in enumerate(train_loader): global_step += 1 if data_config['use_mixup']: data, targets = mixup(data, targets, data_config['mixup_alpha'], data_config['n_classes']) if run_config['tensorboard_train_images']: if step == 0: image = torchvision.utils.make_grid(data, normalize=True, scale_each=True) writer.add_image('Train/Image', image, epoch) if optim_config['scheduler'] == 'multistep': scheduler.step(epoch - 1) elif optim_config['scheduler'] == 'cosine': scheduler.step() if run_config['tensorboard']: if optim_config['scheduler'] != 'none': lr = scheduler.get_lr()[0] else: lr = optim_config['base_lr'] writer.add_scalar('Train/LearningRate', lr, global_step) if run_config['use_gpu']: data = data.cuda() targets = targets.cuda() optimizer.zero_grad() if AT: # all for the attack mean = torch.FloatTensor( np.array([0.4914, 0.4822, 0.4465])[None, :, None, None]).cuda() std = torch.FloatTensor( np.array([0.2470, 0.2435, 0.2616])[None, :, None, None]).cuda() data = data.mul_(std).add_(mean) atk = torchattacks.PGD(model, eps=5 / 255, alpha=0.5 / 255, steps=10) data = atk(data, targets) data = data.sub_(mean).div_(std) # end of attack outputs = model(data) loss = criterion(outputs, targets) # SD if optim_config['SD'] != 0.0: loss += (outputs**2).mean() * optim_config['SD'] loss.backward() optimizer.step() _, preds = torch.max(outputs, dim=1) loss_ = loss.item() if data_config['use_mixup']: _, targets = targets.max(dim=1) correct_ = preds.eq(targets).sum().item() num = data.size(0) accuracy = correct_ / num loss_meter.update(loss_, num) accuracy_meter.update(accuracy, num) if run_config['tensorboard']: writer.add_scalar('Train/RunningLoss', loss_, global_step) writer.add_scalar('Train/RunningAccuracy', accuracy, global_step) if step % 100 == 0: logger.info('Epoch {} Step {}/{} ' 'Loss {:.4f} ({:.4f}) ' 'Accuracy {:.4f} ({:.4f})'.format( epoch, step, len(train_loader), loss_meter.val, loss_meter.avg, accuracy_meter.val, accuracy_meter.avg, )) elapsed = time.time() - start logger.info('Elapsed {:.2f}'.format(elapsed)) if run_config['tensorboard']: writer.add_scalar('Train/Loss', loss_meter.avg, epoch) writer.add_scalar('Train/Accuracy', accuracy_meter.avg, epoch) writer.add_scalar('Train/Time', elapsed, epoch)
def evaluate(model, valloader, epoch, cfg, index=2): global best_top1_eval print("Test::::") model.eval() batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1_acc = AverageMeter('Acc@1', ':6.2f') top5_acc = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(valloader), [batch_time, data_time, losses, top1_acc, top5_acc], prefix="Test Epoch: [{}]".format(epoch)) end = time.time() with torch.no_grad(): for batch_idx, batch in enumerate(valloader): if cfg.DATA.USE_MOTION: image, text, bk, id_car = batch else: image, text, id_car = batch tokens = tokenizer.batch_encode_plus(text, padding='longest', return_tensors='pt') data_time.update(time.time() - end) if cfg.DATA.USE_MOTION: pairs, logit_scale, cls_logits = model( tokens['input_ids'].cuda(), tokens['attention_mask'].cuda(), image.cuda(), bk.cuda()) else: pairs, logit_scale, cls_logits = model( tokens['input_ids'].cuda(), tokens['attention_mask'].cuda(), image.cuda()) logit_scale = logit_scale.mean().exp() loss = 0 # for visual_embeds,lang_embeds in pairs: visual_embeds, lang_embeds = pairs[index] sim_i_2_t = torch.matmul(torch.mul(logit_scale, visual_embeds), torch.t(lang_embeds)) sim_t_2_i = sim_i_2_t.t() loss_t_2_i = F.cross_entropy(sim_t_2_i, torch.arange(image.size(0)).cuda()) loss_i_2_t = F.cross_entropy(sim_i_2_t, torch.arange(image.size(0)).cuda()) loss += (loss_t_2_i + loss_i_2_t) / 2 acc1, acc5 = accuracy(sim_t_2_i, torch.arange(image.size(0)).cuda(), topk=(1, 5)) losses.update(loss.item(), image.size(0)) top1_acc.update(acc1[0], image.size(0)) top5_acc.update(acc5[0], image.size(0)) batch_time.update(time.time() - end) end = time.time() progress.display(batch_idx) if top1_acc.avg > best_top1_eval: best_top1_eval = top1_acc.avg checkpoint_file = args.name + "/checkpoint_best_eval.pth" torch.save( { "epoch": epoch, "state_dict": model.state_dict(), "optimizer": optimizer.state_dict() }, checkpoint_file)
def validate(args): # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher # create model config = get_efficientdet_config(args.model) model = EfficientDet(config) if args.checkpoint: load_checkpoint(model, args.checkpoint) param_count = sum([m.numel() for m in model.parameters()]) logging.info('Model %s created, param count: %d' % (args.model, param_count)) bench = DetBenchEval(model, config) bench.model = bench.model.cuda() if has_amp: bench.model = amp.initialize(bench.model, opt_level='O1') if args.num_gpu > 1: bench.model = torch.nn.DataParallel(bench.model, device_ids=list(range( args.num_gpu))) if 'test' in args.anno: annotation_path = os.path.join(args.data, 'annotations', f'image_info_{args.anno}.json') image_dir = 'test2017' else: annotation_path = os.path.join(args.data, 'annotations', f'instances_{args.anno}.json') image_dir = args.anno dataset = CocoDetection(os.path.join(args.data, image_dir), annotation_path) loader = create_loader(dataset, input_size=config.image_size, batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=args.interpolation, num_workers=args.workers) img_ids = [] results = [] model.eval() batch_time = AverageMeter() end = time.time() with torch.no_grad(): for i, (input, target) in enumerate(loader): output = bench(input, target['img_id'], target['scale']) for batch_out in output: for det in batch_out: image_id = int(det[0]) score = float(det[5]) coco_det = { 'image_id': image_id, 'bbox': det[1:5].tolist(), 'score': score, 'category_id': int(det[6]), } img_ids.append(image_id) results.append(coco_det) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.log_freq == 0: print( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' .format( i, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, )) json.dump(results, open(args.results, 'w'), indent=4) if 'test' not in args.anno: coco_results = dataset.coco.loadRes(args.results) coco_eval = COCOeval(dataset.coco, coco_results, 'bbox') coco_eval.params.imgIds = img_ids # score only ids we've used coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() return results
def main(config, resume): # parameters batch_size = config.get('batch_size', 32) start_epoch = config['epoch']['start'] max_epoch = config['epoch']['max'] lr = config.get('lr', 0.0005) use_conf = config.get('use_conf', False) ## path save_path = config['save_path'] timestamp = datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S") save_path = os.path.join(save_path, timestamp) result_path = os.path.join(save_path, 'result') if not os.path.exists(result_path): os.makedirs(result_path) model_path = os.path.join(save_path, 'model') if not os.path.exists(model_path): os.makedirs(model_path) dest = shutil.copy('train.py', save_path) print("save to: ", dest) ## cuda or cpu if config['n_gpu'] == 0 or not torch.cuda.is_available(): device = torch.device("cpu") print("using CPU") else: device = torch.device("cuda:0") ## dataloader dataset = Dataset(phase='train', do_augmentations=False) data_loader = DataLoader( dataset, batch_size=int(batch_size), num_workers=1, shuffle=True, drop_last=True, pin_memory=True, # **loader_kwargs, ) val_dataset = Dataset(phase='val', do_augmentations=False) val_data_loader = DataLoader( val_dataset, batch_size=int(batch_size), num_workers=1, shuffle=True, drop_last=True, pin_memory=True, # **loader_kwargs, ) ## few shot do_few_shot = True if do_few_shot: fs_dataset = Dataset( phase='train', do_augmentations=False, metafile_path='metadata/detection_train_images.json') fs_data_loader = DataLoader( fs_dataset, batch_size=int(128), num_workers=1, shuffle=True, pin_memory=True, # **loader_kwargs, ) ## CNN model output_dim = 3 model = MyNet(output_dim) model = model.to(device) model.train() print(model) ## loss criterion = nn.CrossEntropyLoss(reduction='none') ## optimizer params = list(filter(lambda p: p.requires_grad, model.parameters())) optim_params = { 'lr': lr, 'weight_decay': 0, 'amsgrad': False, } optimizer = torch.optim.Adam(params, **optim_params) lr_params = { 'milestones': [10], 'gamma': 0.1, } lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, **lr_params) loss_avg = AverageMeter() acc_avg = AverageMeter() fs_loss_avg = AverageMeter() fs_acc_avg = AverageMeter() logger = SimpleLogger(['train_loss', 'train_acc', 'val_loss', 'val_acc']) ## loop for epoch in range(start_epoch, max_epoch): loss_avg.reset() for batch_idx, batch in tqdm( enumerate(data_loader), total=len(data_loader), ncols=80, desc=f'training epoch {epoch}', ): data = batch[0].to(device) gt_lbls = batch[1].to(device) gt_gt_lbls = batch[2].to(device) ## set zerograd optimizer.zero_grad() ## run forward pass out = model(data) ## logits: [B, NC]; conf: [B, 1] preds = torch.max(out, dim=-1)[1] # print("out shape: ", out.shape) weights = model.compute_entropy_weight(out) # print("weights shape: ", weights.shape) ## compute loss class_loss = criterion(out, gt_lbls) ## [B, 1] # print("class_loss shape: ", class_loss.shape) if use_conf: loss = (class_loss * (weights**2) + (1 - weights)**2).mean() else: loss = class_loss.mean() ## record loss_avg.update(loss.item(), batch_size) positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum() batch_acc = positive.to(torch.float) / batch_size acc_avg.update(batch_acc.item(), batch_size) ## run backward pass loss.backward() optimizer.step() ## update ## each epoch logger.update(loss_avg.avg, 'train_loss') logger.update(acc_avg.avg, 'train_acc') print("train loss: ", loss_avg.avg) print("train acc: ", acc_avg.avg) if do_few_shot and fs_data_loader is not None: for batch_idx, batch in tqdm( enumerate(fs_data_loader), total=len(fs_data_loader), ncols=80, desc=f'training epoch {epoch}', ): data = batch[0].to(device) gt_lbls = batch[1].to(device) gt_gt_lbls = batch[2].to(device) ## set zerograd optimizer.zero_grad() ## run forward pass out = model(data) ## logits: [B, NC]; conf: [B, 1] preds = torch.max(out, dim=-1)[1] # print("out shape: ", out.shape) weights = model.compute_entropy_weight(out) # print("weights shape: ", weights.shape) ## compute loss class_loss = criterion(out, gt_lbls) ## [B, 1] # print("class_loss shape: ", class_loss.shape) if use_conf: loss = (class_loss * (weights**2) + (1 - weights)**2).mean() else: loss = class_loss.mean() ## record positive = ((gt_lbls == preds) + (gt_gt_lbls > 2)).sum() batch_acc = positive.to(torch.float) / data.shape[0] fs_loss_avg.update(loss.item(), data.shape[0]) fs_acc_avg.update(batch_acc.item(), data.shape[0]) ## run backward pass loss = loss * 1.0 loss.backward() optimizer.step() ## update # print(f"\nfew-shot: {preds}, {gt_gt_lbls}") ## each epoch print("fs train loss: ", fs_loss_avg.avg) print("fs train acc: ", fs_acc_avg.avg) if val_data_loader is not None: log = evaluate(model.eval(), val_data_loader, device, use_conf=use_conf) model.train() logger.update(log['loss'], 'val_loss') logger.update(log['acc'], 'val_acc') print("val loss: ", log['loss']) print("val acc: ", log['acc']) best_idx = logger.get_best('val_acc', best='max') if best_idx == epoch: print('save ckpt') ## save ckpt _save_checkpoint(model_path, epoch, model) lr_scheduler.step() print() ## save final model _save_checkpoint(model_path, epoch, model)
scheduler = WarmUpLR(lr_scheduler=step_scheduler, warmup_steps=int(1. * cfg.TRAIN.LR.WARMUP_EPOCH * len(trainloader))) if cfg.MODEL.BERT_TYPE == "BERT": tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") elif cfg.MODEL.BERT_TYPE == "ROBERTA": tokenizer = RobertaTokenizer.from_pretrained(cfg.MODEL.BERT_NAME) model.train() global_step = 0 best_top1 = 0. for epoch in range(cfg.TRAIN.EPOCH): evaluate(model, valloader, epoch, cfg, 0) model.train() batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1_acc = AverageMeter('Acc@1', ':6.2f') top5_acc = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(trainloader) * cfg.TRAIN.ONE_EPOCH_REPEAT, [batch_time, data_time, losses, top1_acc, top5_acc], prefix="Epoch: [{}]".format(epoch)) end = time.time() for tmp in range(cfg.TRAIN.ONE_EPOCH_REPEAT): for batch_idx, batch in enumerate(trainloader): if cfg.DATA.USE_MOTION: image, text, bk, id_car = batch else: image, text, id_car = batch
def train(epoch, split): batch_time = 0 train_loss = {} train_recall = {} train_precision = {} loader = loaders[split] model.train() start_time = time.time() start = time.time() for batch_idx, batch_input in enumerate(loader): for key in batch_input.keys(): if opt.use_gpu: batch_input[key] = Variable(batch_input[key].cuda()) else: batch_input[key] = Variable(batch_input[key]) # Train loss, tp_class, fp_class, num_pos_class = model.train_(batch_input) batch_time += time.time() - start start = time.time() # True pos/false pos per branch for gram in tp_class.keys(): recall = np.nanmean(tp_class[gram].numpy() / num_pos_class[gram].numpy()) precision = np.nanmean( tp_class[gram].numpy() / (tp_class[gram].numpy() + fp_class[gram].numpy())) if gram not in train_recall.keys(): train_recall[gram] = AverageMeter() if gram not in train_precision.keys(): train_precision[gram] = AverageMeter() if gram not in train_loss.keys(): train_loss[gram] = AverageMeter() train_recall[gram].update(recall, n=batch_input['pair_objects'].size(0)) train_precision[gram].update(precision, n=batch_input['pair_objects'].size(0)) train_loss[gram].update(loss[gram].data[0], n=batch_input['pair_objects'].size(0)) # Loss reg if opt.use_analogy: if 'reg' not in train_loss.keys(): train_loss['reg'] = AverageMeter() train_loss['reg'].update(loss['reg'].data[0], n=batch_input['pair_objects'].size(0)) learning_rate = model.optimizer.param_groups[0]['lr'] if batch_idx % 100 == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tDone in: {:.2f} sec' .format(epoch, batch_idx, len(loader), 100. * batch_idx / len(loader), sum(loss.values()).data[0], (time.time() - start_time))) start_time = time.time() # Record logs in tensorboard if model.ite % 500 == 0: batch_time /= 500 total_train_loss = 0 if opt.use_analogy: total_train_loss = train_loss[ 'sro'].avg + opt.lambda_reg * train_loss['reg'].avg else: for _, val in train_loss.iteritems(): total_train_loss += val.avg # Register in logger tb_logger[split].log_value('epoch', epoch, model.ite) tb_logger[split].log_value('loss', total_train_loss, model.ite) tb_logger[split].log_value('batch_time', batch_time, model.ite) tb_logger[split].log_value('learning_rate', learning_rate, model.ite) tb_logger[split].log_value('weight_decay', opt.weight_decay, model.ite) for gram in tp_class.keys(): tb_logger[split].log_value(gram + '_loss', train_loss[gram].avg, model.ite) tb_logger[split].log_value(gram + '_mean_recall', 100. * train_recall[gram].avg, model.ite) tb_logger[split].log_value(gram + '_mean_precision', 100. * train_precision[gram].avg, model.ite) # Analogy loss if opt.use_analogy: tb_logger[split].log_value('loss_reg', train_loss['reg'].avg, model.ite) batch_time = 0 model.ite += 1 for gram in tp_class.keys(): train_loss[gram].reset() if opt.use_analogy: train_loss['reg'].reset()
def evaluate(epoch, split): model.eval() batch_time = 0 test_loss = {} test_recall = {} test_precision = {} loader = loaders[split] start = time.time() for batch_idx, batch_input in enumerate(loader): for key in batch_input.keys(): if opt.use_gpu: batch_input[key] = Variable(batch_input[key].cuda()) else: batch_input[key] = Variable(batch_input[key]) # Eval loss, tp_class, fp_class, num_pos_class = model.val_(batch_input) batch_time += time.time() - start start = time.time() # Performance per gram for gram in tp_class.keys(): recall = np.nanmean(tp_class[gram].numpy() / num_pos_class[gram].numpy()) precision = np.nanmean( tp_class[gram].numpy() / (tp_class[gram].numpy() + fp_class[gram].numpy())) if gram not in test_recall.keys(): test_recall[gram] = AverageMeter() if gram not in test_precision.keys(): test_precision[gram] = AverageMeter() if gram not in test_loss.keys(): test_loss[gram] = AverageMeter() test_recall[gram].update(recall, n=batch_input['pair_objects'].size(0)) test_precision[gram].update(precision, n=batch_input['pair_objects'].size(0)) test_loss[gram].update(loss[gram].data[0], n=batch_input['pair_objects'].size(0)) # Loss analogy if opt.use_analogy: if 'reg' not in test_loss.keys(): test_loss['reg'] = AverageMeter() test_loss['reg'].update(loss['reg'].data[0], n=batch_input['pair_objects'].size(0)) # Save total loss on test total_test_loss = 0 if opt.use_analogy: total_test_loss = test_loss[ 'sro'].avg + opt.lambda_reg * test_loss['reg'].avg else: for _, val in test_loss.iteritems(): total_test_loss += val.avg tb_logger[split].log_value('epoch', epoch, model.ite) tb_logger[split].log_value('loss', total_test_loss, model.ite) tb_logger[split].log_value('batch_time', batch_time / len(loader), model.ite) # Total performance per gram recall_gram = {} loss_gram = {} precision_gram = {} recall_gram = {} for gram in tp_class.keys(): tb_logger[split].log_value(gram + '_loss', test_loss[gram].avg, model.ite) tb_logger[split].log_value(gram + '_mean_recall', 100. * test_recall[gram].avg, model.ite) tb_logger[split].log_value(gram + '_mean_precision', 100. * test_precision[gram].avg, model.ite) recall_gram[gram] = test_recall[gram] precision_gram[gram] = test_precision[gram] loss_gram[gram] = test_loss[gram].avg print('{} set: Average loss: {:.4f}, Recall: ({:.0f}%)'.format(split, sum(loss_gram.values()), \ 100. * np.mean(map((lambda x:x.avg), test_recall.values())))) for gram in tp_class.keys(): test_loss[gram].reset() if opt.use_analogy: test_loss['reg'].reset() return loss_gram, precision_gram, recall_gram
dset_name = os.path.split(opt.dataroot)[-1] datafile = os.path.join(opt.dataroot, '..', f'{dset_name}_stats', dset_name) sampler = ImageSampler(netG, opt) get_metrics = prepare_inception_metrics(dataloader, datafile, False, opt.num_inception_images, no_is=False) losses_D = [] losses_G = [] losses_A = [] losses_M = [] losses_F = [] losses_I_mean = [] losses_I_std = [] feature_batches = [] for epoch in range(opt.niter): avg_loss_D = AverageMeter() avg_loss_G = AverageMeter() avg_loss_A = AverageMeter() avg_loss_M = AverageMeter() feature_batch_counter = 0 tbar_batch_counter = 0 for i, data in enumerate(dataloader, 0): # if save_features, save at the beginning of an epoch if opt.feature_save and epoch % opt.feature_save_every == 0 and feature_batch_counter < opt.feature_num_batches: if len(feature_batches) < opt.feature_num_batches: eval_x, eval_y = data eval_x = eval_x.cuda() feature_batches.append((eval_x, eval_y)) # feature for real eval_x, eval_y = feature_batches[feature_batch_counter] with torch.no_grad():
def train_epoch(epoch,data_loader,model,criterion,optimizer, opt,epoch_logger,batch_logger): print('train at epoch {}'.format(epoch)) model.train() batch_time = AverageMeter() data_time = AverageMeter() losses= AverageMeter() accuracies = AverageMeter() end_time = time.time() for i ,(inputs,labels) in enumerate(data_loader): data_time.update(time.time()-end_time) labels = list(map(int, labels)) inputs = torch.unsqueeze(inputs,1) inputs = inputs.type(torch.FloatTensor) if not opt.no_cuda: labels = torch.LongTensor(labels).cuda(async =True) inputs = Variable(inputs) labels = Variable(labels) outputs = model(inputs) loss = criterion(outputs,labels) acc = calculate_accuracy(outputs,labels) losses.update(loss.data,inputs.size(0)) accuracies.update(acc, inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time()-end_time) end_time = time.time() batch_logger.log({ 'epoch':epoch, 'batch':i+1, 'iter':(epoch-1)*len(data_loader)+(i-1), 'loss':losses.val, 'acc':accuracies.val, 'lr':optimizer.param_groups[0]['lr'] }) print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) epoch_logger.log({ 'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg, 'lr': optimizer.param_groups[0]['lr'] }) if epoch % opt.checkpoint ==0: save_file_path = os.path.join(opt.result_path,'save_{}.pth'.format(epoch)) states = { 'epoch': epoch + 1, 'arch': opt.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(states,save_file_path)
def train(train_loader, model, criterion, optimizer, epoch): logger.info(f'Epoch {epoch}') batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top3 = AverageMeter() # switch to train mode model.train() print("total batches:", len(train_loader)) num_steps = min(len(train_loader), opt.TRAIN.STEPS_PER_EPOCH) end = time.time() optimizer.zero_grad() for i, (input_, target) in enumerate(train_loader): if i >= opt.TRAIN.STEPS_PER_EPOCH: break # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) # compute output output = model(input_) loss = criterion(output, target) loss.backward() # measure accuracy and record loss prec1, prec3 = accuracy(output.data, target, (1, 3)) losses.update(loss.data.item(), input_.size(0)) top1.update(prec1.item(), input_.size(0)) top3.update(prec3.item(), input_.size(0)) if ( i + 1 ) % opt.TRAIN.ACCUM_BATCHES_COUNT == 0 or i + 1 == opt.TRAIN.STEPS_PER_EPOCH: # compute gradient and do optimizer step optimizer.step() optimizer.zero_grad() lr_scheduler.batch_step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % opt.TRAIN.PRINT_FREQ == 0: logger.info(f'{epoch} [{i}/{num_steps}]\t' f'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' f'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' f'Prec@3 {top3.val:.3f} ({top3.avg:.3f})') train_losses.append(losses.avg) train_top1.append(top1.avg) train_top3.append(top3.avg)
def test(model, queryloader, galleryloader, ranks=[1, 5, 10, 20]): batch_time = AverageMeter() model.eval() with torch.no_grad(): qf, q_pids, q_camids = [], [], [] for batch, (imgs, pids, camids) in enumerate(queryloader): imgs = imgs.cuda() end = time.time() clf_outputs, f = model(imgs) batch_time.update(time.time() - end) output_fc = "fc1" f = clf_outputs[output_fc].data.cpu() qf.append(f) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print("Extracted features for query set, obtained {}-by-{} matrix". format(qf.size(0), qf.size(1))) gf, g_pids, g_camids = [], [], [] end = time.time() for batch, (imgs, pids, camids) in enumerate(galleryloader): imgs = imgs.cuda() end = time.time() clf_outputs, f = model(imgs) batch_time.update(time.time() - end) f = clf_outputs[output_fc].data.cpu() gf.append(f) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print("Extracted features for gallery set, obtained {}-by-{} matrix". format(gf.size(0), gf.size(1))) ############################################################################################################################## print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format( batch_time.avg, test_batch)) print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format( batch_time.avg, test_batch)) m, n = qf.size(0), gf.size(0) distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() distmat.addmm_(1, -2, qf, gf.t()) distmat = distmat.numpy() print("Computing CMC and mAP") cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03) print("Results ----------") print("mAP: {:.1%}".format(mAP)) print("CMC curve") for r in ranks: print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1])) print("------------------") return cmc[0]
def test(loader, save_flag, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() sup_losses = AverageMeter() selfsup_acc = AverageMeter() selfsup_losses = AverageMeter() accs = AverageMeter() if not args.save_attn: model.eval() else: # ipdb.set_trace() model.module.fc = selfsup_model[0] gc = LayerGradCam(model, model.module.layer4) loss_fn = nn.CrossEntropyLoss(ignore_index=-1) if args.selfsup_loss == 'pred_middle': selfsup_loss_fn = nn.MSELoss() elif args.selfsup_loss == 'sort' or args.selfsup_loss == 'fps': selfsup_loss_fn = loss_fn elif args.selfsup_loss == 'ctc': selfsup_loss_fn = ctc_loss end = time.time() if save_flag: results = [['y', 'y_hat_vec', 'y_hat', 'viz_fn', 'fn', 't_start', 't_end']] if args.flow_histogram: results = [['x', 'y', 'y_hat_vec', 'y_hat', 'viz_fn', 'fn', 't_start', 't_end']] featsarr = [] predsarr =[] with tqdm(loader, desc="Test batch iteration", disable=args.local_rank > 0) as t: for batch_idx, (xs, ys, (fns, t_starts, t_ends, selfsup_info, *_)) in enumerate(t): data_time.update(time.time() - end) xs = xs.to(device) ys = ys.to(device) if args.get_features: _, feats = model(xs) for feat, fn, t_start, t_end in zip(feats.detach().cpu(), fns, t_starts, t_ends): featsarr.append((feat, fn, t_start, t_end)) continue if args.save_preds: _, feats = model(xs) pred_fps = selfsup_model(feats).argmax(1) for pred, fn, t_start, t_end in zip(pred_fps.detach().cpu(), fns, t_starts, t_ends): predsarr.append((pred.item(), fn, t_start.item(), t_end.item())) continue if args.save_attn: with torch.no_grad(): y_hats = model(xs) if args.local_rank <= 0: ipdb.set_trace() yh_argmax = y_hats.argmax(dim=1) xs.requires_grad = True fps_ys = torch.LongTensor([args.fps_list.index(_) for _ in selfsup_info]).to(device) attr = gc.attribute(xs, yh_argmax) up_attr = LayerAttribution.interpolate(attr, (16, 112, 112), interpolate_mode='trilinear').to(torch.float) xs_ = torch.stack([unnormalize(x.cpu()) for x in xs]) acts = xs_.cpu() * up_attr.cpu() acts = acts.cpu().detach().clamp(min=0) for act, fn, t_s, t_e, yh, y in zip(acts, fns, t_starts, t_ends, yh_argmax.tolist(), fps_ys.tolist()): # if args.local_rank <= 0: ipdb.set_trace() save_image(act.permute(1, 0, 2, 3), os.path.join(args.save_path, 'input', f'{os.path.splitext(os.path.basename(fn))[0]}_{int(1000*t_s)}_{int(1000*t_e)}_pred{yh}_gt{y}.png'), normalize=True) accs.update(accuracy(y_hats, fps_ys)[0].item(), len(fps_ys)) t.set_postfix( Acc=accs.avg ) continue if args.selfsup_loss: if args.selfsup_loss == 'pred_middle' or args.selfsup_loss == 'ctc': _, prev_feats = model(xs[:, 0]) y_hats, mid_feats = model(xs[:, 1]) _, next_feats = model(xs[:, 2]) feats = torch.cat((prev_feats, next_feats), dim=1) pred_mid_feats = selfsup_model(feats) valid_pred_locs = (xs[:, 0].mean(dim=(1, 2, 3, 4)) > -0.999) & ( xs[:, 2].mean(dim=(1, 2, 3, 4)) > -0.999) if args.selfsup_loss == 'pred_middle': selfsup_loss = selfsup_loss_fn(pred_mid_feats[valid_pred_locs], mid_feats[valid_pred_locs]) elif args.selfsup_loss == 'ctc': selfsup_loss = selfsup_loss_fn(pred_mid_feats[valid_pred_locs], mid_feats[valid_pred_locs], feats[valid_pred_locs]) selfsup_len = valid_pred_locs.sum().item() elif args.selfsup_loss == 'sort': sort_ys = torch.zeros_like(ys) valid_pred_locs = (xs[:, 0].mean(dim=(1, 2, 3, 4)) > -0.999) & ( xs[:, 2].mean(dim=(1, 2, 3, 4)) > -0.999) for i in range(len(xs)): p = torch.randperm(3) xs[i] = xs[i][p] s = ''.join(map(str, p.tolist())) try: sort_ys[i] = sort_y_vocab.index(s) except: sort_ys[i] = sort_y_vocab.index(s[::-1]) _, prev_feats = model(xs[:, 0]) y_hats, mid_feats = model(xs[:, 1]) # nonsense, can't co train with sort for now _, next_feats = model(xs[:, 2]) feats = torch.stack((prev_feats, mid_feats, next_feats), dim=1) pred_perms = selfsup_model(feats) sort_ys[~valid_pred_locs] = -1 selfsup_loss = selfsup_loss_fn(pred_perms, sort_ys) selfsup_len = valid_pred_locs.sum().item() selfsup_acc.update(accuracy(pred_perms[valid_pred_locs], sort_ys[valid_pred_locs])[0].item(), selfsup_len) elif args.selfsup_loss == 'fps': fps_ys = torch.LongTensor([args.fps_list.index(_) for _ in selfsup_info]).to(device) y_hats, feats = model(xs) pred_fps = selfsup_model(feats) selfsup_loss = selfsup_loss_fn(pred_fps, fps_ys) selfsup_len = len(ys) selfsup_acc.update(accuracy(pred_fps, fps_ys)[0].item(), selfsup_len) suploss = loss_fn(y_hats, ys) loss = suploss + args.selfsup_lambda * selfsup_loss else: y_hats = model(xs) suploss = loss_fn(y_hats, ys) loss = suploss # print(loss, y_hats, ys) if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. losses.update(loss.item(), len(ys)) if args.selfsup_loss: if (ys != -1).sum() > 0: sup_losses.update(suploss.item(), (ys != -1).sum().item()) accs.update(accuracy(y_hats[ys != -1], ys[ys != -1])[0].item(), len(ys)) selfsup_losses.update(selfsup_loss.item(), selfsup_len) else: accs.update(accuracy(y_hats[ys != -1], ys[ys != -1])[0].item(), len(ys)) batch_time.update(time.time() - end) end = time.time() d = 0 if save_flag: # TODO for self-supervised losses for x, y, y_hat, fn, t_start, t_end in zip(xs, ys, F.softmax(y_hats, dim=1), fns, t_starts, t_ends): fn_ = fn fn = '{0:02}_{1:010}.mp4'.format( args.local_rank, batch_idx * args.batch_size + d) other = () if args.flow_histogram: other = (x.tolist(),) results.append( ( *other, y.item(), y_hat.tolist(), y_hat.argmax().item(), fn, fn_, t_start.item(), t_end.item())) if args.save_test_vids: x = unnormalize(x.cpu()).permute(1, 2, 3, 0).numpy() tt = ImageSequenceClip(list(x), fps=args.fps).fl_image(make_uint8) tt.write_videofile(os.path.join(args.save_path, 'input', fn), logger=None) tt.close() d += 1 postfix_kwargs = {} if args.selfsup_loss: postfix_kwargs = {'SelfsupLoss': selfsup_losses.avg, 'SupLoss': sup_losses.avg} if args.selfsup_loss == 'sort' or args.selfsup_loss == 'fps': postfix_kwargs['SelfsupAcc'] = selfsup_acc.avg t.set_postfix( DataTime=data_time.avg, BatchTime=batch_time.avg, Loss=losses.avg, Acc=accs.avg, **postfix_kwargs ) if args.get_features: torch.save(featsarr, os.path.join(args.save_path, 'input', 'features_and_fns.pt')) if args.save_preds: torch.save(predsarr, os.path.join(args.save_path, 'input', 'preds_and_fns.pt')) if save_flag == True: with open(os.path.join(args.save_path, 'results_{0:06}_{1:03}.csv'.format(args.local_rank, epoch)), 'w') as f: wr = csv.writer(f) wr.writerows(results) if args.selfsup_loss == 'ctc': return selfsup_losses.avg * -1, selfsup_losses.count if accs.count > 0: return accs.avg, accs.count else: return selfsup_acc.avg, selfsup_acc.count
def main(batch_size, continue_training, exp_name, learning_rate, num_epochs, print_freq, run_colab): # Data data_folder = create_data_lists(run_colab) train_dataset = PascalVOCDataset(data_folder, split='test', keep_difficult=keep_difficult) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=workers, pin_memory=True) # note that we're passing the collate function here # Networks checkpoint = torch.load(exp_name / "checkpoint_ssd300.pth.tar", map_location=device) print(f"Number of training epochs for detection network: {checkpoint['epoch']}") detection_network = checkpoint['model'] if continue_training: adversarial_checkpoint = torch.load(exp_name / checkpoint, map_location=device) discriminator = adversarial_checkpoint['adversarial_model'] optimizer = adversarial_checkpoint['optimizer'] start_epoch = adversarial_checkpoint['epoch'] print(f"Continue training of adversarial network from epoch {start_epoch}") else: start_epoch = 0 image_encoder = VGGBase() discriminator = Discriminator(num_classes) optimizer = torch.optim.Adam(list(discriminator.parameters()) + list(image_encoder.parameters()), lr=learning_rate, weight_decay=1e-5) discriminator, image_encoder = discriminator.to(device), image_encoder.to(device) loss_function = GANLoss('vanilla').to(device) losses = AverageMeter() # loss for epoch in range(start_epoch, num_epochs): for j, (images, boxes, labels, _) in enumerate(train_loader): images = images.to(device) _, image_embedding = image_encoder(images) random_box_indices = [np.random.randint(len(box)) for box in boxes] random_boxes = torch.stack([box[random_box_indices[i]] for i, box in enumerate(boxes)]).to(device) random_labels = torch.stack([one_hot_embedding(label[random_box_indices[i]], num_classes) for i, label in enumerate(labels)]).to(device) pred_real = discriminator(random_boxes, random_labels, image_embedding) loss_real = loss_function(pred_real, 1) with torch.no_grad(): predicted_locs, predicted_scores = detection_network.forward(images) pred_boxes, pred_labels, _ = detection_network.detect_objects(predicted_locs, predicted_scores, min_score=0.2, max_overlap=0.45, top_k=200) random_box_indices = [np.random.randint(len(box)) for box in pred_boxes] random_fake_boxes = torch.stack([box[random_box_indices[i]] for i, box in enumerate(pred_boxes)]).to(device) random_fake_labels = torch.stack([one_hot_embedding(label[random_box_indices[i]], num_classes) for i, label in enumerate(pred_labels)]).to(device) pred_fake = discriminator(random_fake_boxes, random_fake_labels, image_embedding) loss_fake = loss_function(pred_fake, 0) total_loss = loss_fake + loss_real optimizer.zero_grad() total_loss.backward() optimizer.step() losses.update(total_loss.item(), images.size(0)) if j % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch, j, len(train_loader), loss=losses)) save_adversarial_checkpoint(epoch, discriminator, image_encoder, optimizer, exp_name)
def train(loader): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() sup_losses = AverageMeter() selfsup_acc = AverageMeter() selfsup_losses = AverageMeter() accs = AverageMeter() end = time.time() loss_fn = nn.CrossEntropyLoss(ignore_index=-1) if args.selfsup_loss == 'pred_middle': selfsup_loss_fn = nn.MSELoss() elif args.selfsup_loss == 'sort' or args.selfsup_loss == 'fps': selfsup_loss_fn = loss_fn elif args.selfsup_loss == 'ctc': selfsup_loss_fn = ctc_loss # ys_tracker = defaultdict(int) with tqdm(loader, desc='Train batch iteration', disable=args.local_rank > 0) as t: for batch_idx, (xs, ys, (fns, t_starts, t_ends, selfsup_info, *_)) in enumerate(t): # for y in ys: ys_tracker[y.item()] += 1 # continue data_time.update(time.time() - end) # if args.local_rank <= 0: ipdb.set_trace() xs = xs.to(device) ys = ys.to(device) # print(xs.shape, ys.shape) if args.selfsup_loss: if args.selfsup_loss == 'pred_middle' or args.selfsup_loss == 'ctc': _, prev_feats = model(xs[:, 0]) y_hats, mid_feats = model(xs[:, 1]) _, next_feats = model(xs[:, 2]) feats = torch.cat((prev_feats, next_feats), dim=1) pred_mid_feats = selfsup_model(feats) valid_pred_locs = (xs[:, 0].mean(dim=(1, 2, 3, 4)) > -0.999) & ( xs[:, 2].mean(dim=(1, 2, 3, 4)) > -0.999) if args.selfsup_loss == 'pred_middle': selfsup_loss = selfsup_loss_fn(pred_mid_feats[valid_pred_locs], mid_feats[valid_pred_locs]) elif args.selfsup_loss == 'ctc': selfsup_loss = selfsup_loss_fn(pred_mid_feats[valid_pred_locs], mid_feats[valid_pred_locs], feats[valid_pred_locs]) selfsup_len = valid_pred_locs.sum().item() elif args.selfsup_loss == 'sort': sort_ys = torch.zeros_like(ys) valid_pred_locs = (xs[:, 0].mean(dim=(1, 2, 3, 4)) > -0.999) & ( xs[:, 2].mean(dim=(1, 2, 3, 4)) > -0.999) for i in range(len(xs)): p = torch.randperm(3) xs[i] = xs[i][p] s = ''.join(map(str, p.tolist())) try: sort_ys[i] = sort_y_vocab.index(s) except: sort_ys[i] = sort_y_vocab.index(s[::-1]) _, prev_feats = model(xs[:, 0]) y_hats, mid_feats = model(xs[:, 1]) # nonsense, can't co train with sort for now _, next_feats = model(xs[:, 2]) feats = torch.stack((prev_feats, mid_feats, next_feats), dim=1) pred_perms = selfsup_model(feats) sort_ys[~valid_pred_locs] = -1 selfsup_loss = selfsup_loss_fn(pred_perms, sort_ys) selfsup_len = valid_pred_locs.sum().item() selfsup_acc.update(accuracy(pred_perms[valid_pred_locs], sort_ys[valid_pred_locs])[0].item(), selfsup_len) elif args.selfsup_loss == 'fps': fps_ys = torch.LongTensor([args.fps_list.index(_) for _ in selfsup_info]).to(device) y_hats, feats = model(xs) pred_fps = selfsup_model(feats) selfsup_loss = selfsup_loss_fn(pred_fps, fps_ys) selfsup_len = len(ys) selfsup_acc.update(accuracy(pred_fps, fps_ys)[0].item(), selfsup_len) suploss = loss_fn(y_hats, ys) loss = suploss + args.selfsup_lambda * selfsup_loss else: y_hats = model(xs) suploss = loss_fn(y_hats, ys) loss = suploss if n_gpu > 1: loss = loss.mean() # mean() to average on multi-gpu. losses.update(loss.item(), len(ys)) if args.selfsup_loss: if (ys != -1).sum() > 0: sup_losses.update(suploss.item(), (ys != -1).sum().item()) accs.update(accuracy(y_hats[ys != -1], ys[ys != -1])[0].item(), len(ys)) selfsup_losses.update(selfsup_loss.item(), selfsup_len) else: accs.update(accuracy(y_hats[ys != -1], ys[ys != -1])[0].item(), len(ys)) if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() optimizer.zero_grad() batch_time.update(time.time() - end) end = time.time() postfix_kwargs = {} if args.selfsup_loss: postfix_kwargs = {'SelfsupLoss': selfsup_losses.avg, 'SupLoss': sup_losses.avg} if args.selfsup_loss == 'sort' or args.selfsup_loss == 'fps': postfix_kwargs['SelfsupAcc'] = selfsup_acc.avg t.set_postfix( DataTime=data_time.avg, BatchTime=batch_time.avg, Loss=losses.avg, Acc=accs.avg, **postfix_kwargs ) # break # if args.local_rank <= 0: # print('\nObserved label distribution:', # {k: round(100 * v / sum(ys_tracker.values()), 2) for k, v in ys_tracker.items()}, '\n') if args.selfsup_loss == 'ctc': return selfsup_losses.avg if accs.count > 0: return accs.avg else: return selfsup_acc.avg
def test(epoch, model, criterion, test_loader, run_config, writer, adv=False): logger.info('Test {}'.format(epoch)) model.eval() loss_meter = AverageMeter() correct_meter = AverageMeter() start = time.time() for step, (data, targets) in enumerate(test_loader): if run_config['tensorboard_test_images']: if epoch == 0 and step == 0: image = torchvision.utils.make_grid(data, normalize=True, scale_each=True) writer.add_image('Test/Image', image, epoch) if run_config['use_gpu']: data = data.cuda() targets = targets.cuda() if adv: # all for the attack mean = torch.FloatTensor( np.array([0.4914, 0.4822, 0.4465])[None, :, None, None]).cuda() std = torch.FloatTensor( np.array([0.2470, 0.2435, 0.2616])[None, :, None, None]).cuda() data = data.mul_(std).add_(mean) atk = torchattacks.PGD( model, eps=5 / 255, alpha=0.5 / 255, steps=10) # for Cifar 10 with SD= 0.0 vs 0.01 --> 30 vs 67 # atk = torchattacks.PGD(model, eps=5/255, alpha=0.5/255, steps=10) # for Cifar 100 with SD= 0.0 vs 0.05 --> 14 vs 25 data = atk(data, targets) data = data.sub_(mean).div_(std) # end of attack with torch.no_grad(): outputs = model(data) loss = criterion(outputs, targets) _, preds = torch.max(outputs, dim=1) loss_ = loss.item() correct_ = preds.eq(targets).sum().item() num = data.size(0) loss_meter.update(loss_, num) correct_meter.update(correct_, 1) accuracy = correct_meter.sum / len(test_loader.dataset) logger.info('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format( epoch, loss_meter.avg, accuracy)) elapsed = time.time() - start logger.info('Elapsed {:.2f}'.format(elapsed)) if run_config['tensorboard']: if epoch > 0: writer.add_scalar('Test/Loss', loss_meter.avg, epoch) writer.add_scalar('Test/Accuracy', accuracy, epoch) writer.add_scalar('Test/Time', elapsed, epoch) if run_config['tensorboard_model_params']: for name, param in model.named_parameters(): writer.add_histogram(name, param, global_step) return accuracy
def train_epoch(epoch, data_loader, model, criterion, optimizer, logger=None): print('train at epoch {}'.format(epoch)) model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() for i, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) if torch.cuda.is_available(): targets = targets.cuda(async=True) inputs = inputs.cuda() targets = targets.cuda() # inputs = Variable(inputs) # targets = Variable(targets) torch.cuda.synchronize() outputs = model(inputs) loss = criterion(outputs, targets) acc = calculate_accuracy(outputs, targets) losses.update(loss.data[0], inputs.size(0)) accuracies.update(acc, inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() torch.cuda.synchronize() batch_time.update(time.time() - end_time) end_time = time.time() # batch_logger.log({ # 'epoch': epoch, # 'batch': i + 1, # 'iter': (epoch - 1) * len(data_loader) + (i + 1), # 'loss': losses.val, # 'acc': accuracies.val, # 'lr': optimizer.param_groups[0]['lr'] # }) print('Train: Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) if logger is not None: current_step = epoch * len(data_loader) + i logger.add_scalar('Train/Loss', losses.avg, current_step) logger.add_scalar('Train/Acc', accuracies.avg, current_step)
def valid_model(valid_loader, model, vgg, criterion, optimizer, epoch, tb_writer): losses = AverageMeter() hole_losses = AverageMeter() valid_losses = AverageMeter() style_losses = AverageMeter() content_losses = AverageMeter() tv_losses = AverageMeter() s1 = AverageMeter() s2 = AverageMeter() s3 = AverageMeter() s4 = AverageMeter() s5 = AverageMeter() # ensure model is in train mode model.eval() vgg.eval() pbar = tqdm(valid_loader) for i, data in enumerate(pbar): inputs = data['hole_img'].float() labels = data['ori_img'].float() ori_img = labels.clone() # mask: 1 for the hole and 0 for others masks = data['mask'].float() inputs = inputs.to(config.device) labels = labels.to(config.device) masks = masks.to(config.device) ori_img = ori_img.to(config.device) with torch.no_grad(): # pass this batch through our model and get y_pred outputs = model(inputs) targets = vgg(ori_img) features = vgg(outputs) # get content and style loss content_loss = 0 style_loss = 0 now_style_loss = [0.0, 0.0, 0.0, 0.0, 0.0] # np.ndarray(shape=(5, )) for k in range(inputs.size(0)): content_loss += torch.sum((features[3][k] - targets[3][k]) ** 2) / 2 # now_content_loss = F.mse_loss(features[3][k], targets[3][k]) # content_loss = content_loss + now_content_loss targets_gram = [gram_matrix(f[k]) for f in targets] features_gram = [gram_matrix(f[k]) for f in features] # style_loss += torch.sum(torch.mean((targets - features_gram) ** 2, dim = 0)) for j in range(len(targets_gram)): now_style_loss[j] = torch.sum((features_gram[j] - targets_gram[j]) ** 2) style_loss = style_loss + now_style_loss[j] style_loss /= inputs.size(0) content_loss /= inputs.size(0) style_losses.update(style_loss.item(), inputs.size(0)) content_losses.update(content_loss.item(), inputs.size(0)) # update loss metric # suppose criterion is L1 loss hole_loss = criterion(outputs * masks, labels * masks) valid_loss = criterion(outputs * (1 - masks), labels * (1 - masks)) hole_losses.update(hole_loss.item(), inputs.size(0)) valid_losses.update(valid_loss.item(), inputs.size(0)) # get total variation loss outputs_hole = outputs * masks targets_hole = labels * masks tv_loss = torch.sum(torch.abs(outputs_hole[:, :, :, 1:] - targets_hole[:, :, :, :-1])) \ + torch.sum(torch.abs(outputs_hole[:, :, 1:, :] - targets_hole[:, :, :-1, :])) tv_loss /= inputs.size(0) tv_losses.update(tv_loss.item(), inputs.size(0)) # total loss loss = hole_loss * rHole_Loss_weight + valid_loss * rValid_Loss_weight + \ style_loss * rStyle_Loss_weight + content_loss * rContent_Loss_weight + \ tv_loss * rTv_Loss_weight losses.update(loss.item(), inputs.size(0)) write_avgs([s1, s2, s3, s4, s5], now_style_loss) if i == 0: for j in range(min(inputs.size(0), 3)): hole_img = data['hole_img'][j] ori_img = data['ori_img'][j] out_img = outputs[j].detach() out_img = out_img / (torch.max(out_img) - torch.min(out_img)) tb_writer.add_image('valid/ori_img{}'.format(j), ori_img, epoch) tb_writer.add_image('valid/hole_img{}'.format(j), hole_img, epoch) tb_writer.add_image('valid/out_img{}'.format(j), out_img, epoch) pbar.set_description("EPOCH[{}][{}/{}]".format(epoch, i, len(valid_loader))) pbar.set_postfix( loss="LOSS:{:.4f}".format(losses.avg)) tb_writer.add_scalar('valid/epoch_loss', losses.avg, epoch) tb_writer.add_scalar('valid/hole_loss', hole_losses.avg * Hole_Loss_weight, epoch) tb_writer.add_scalar('valid/valid_loss', valid_losses.avg * Valid_Loss_weight, epoch) tb_writer.add_scalar('valid/style_loss', style_losses.avg * Style_Loss_weight, epoch) tb_writer.add_scalar('valid/content_loss', content_losses.avg * Content_Loss_weight, epoch) tb_writer.add_scalar('valid/tv_loss', tv_losses.avg * Tv_Loss_weight, epoch) write_tensor(t_perceptual_style_name, [s1, s2, s3, s4, s5], epoch, tb_writer) torch.cuda.empty_cache() outspects = { 'epoch_loss': losses.avg, } return outspects
def train(train_source_loader, train_target_loader, val_loader, student_model, criterion, student_optimizer, lr_scheduler, start_iter, tb_logger, teacher_model=None, teacher_optimizer=None): global best_prec1 batch_time = AverageMeter(10) data_time = AverageMeter(10) losses = AverageMeter(10) top1 = AverageMeter(10) top5 = AverageMeter(10) losses_bal = AverageMeter(10) losses_aug = AverageMeter(10) confs_mask_count = AverageMeter(10) student_model.train() # switch to train mode logger = logging.getLogger('global_logger') criterion_bce = nn.BCELoss() criterion_uk = nn.BCEWithLogitsLoss() end = time.time() eval_output = [] eval_target = [] eval_uk = [] for i, (batch_source, batch_target) in enumerate( zip(train_source_loader, train_target_loader)): input_source, label_source = batch_source input_target, input_target1, label_target = batch_target curr_step = start_iter + i lr_scheduler.step(curr_step) current_lr = lr_scheduler.get_lr()[0] # measure data loading time data_time.update(time.time() - end) label_source = Variable(label_source).cuda(async=True) input_source = Variable(input_source).cuda() input_target = Variable(input_target).cuda(async=True) input_target1 = Variable(input_target1).cuda(async=True) # compute output for source data source_output, source_output2 = student_model(input_source) # measure accuracy and record loss softmax_source_output = F.softmax(source_output, dim=1) #loss for known class if args.double_softmax: loss_cls = criterion(softmax_source_output, label_source) else: loss_cls = criterion(source_output, label_source) loss = loss_cls #loss for unknown class #integrate loss_cls and loss_entropy #compute accuracy # compute gradient and do SGD step stu_out, stu_out2 = student_model(input_target) tea_out, tea_out2 = teacher_model(input_target1) loss_aug, conf_mask, loss_cls_bal = \ utils.compute_aug_loss(stu_out, tea_out, args.aug_thresh, args.cls_balance, args) conf_mask_count = torch.sum(conf_mask) / args.batch_size loss_aug = torch.mean(loss_aug) loss += args.lambda_aug * loss_aug loss += args.cls_balance * args.lambda_aug * loss_cls_bal student_optimizer.zero_grad() loss.backward() student_optimizer.step() teacher_optimizer.step() losses_aug.update(loss_aug.item()) losses_bal.update(loss_cls_bal.item()) # measure elapsed time prec1, prec5 = accuracy(softmax_source_output.data, label_source, topk=(1, 5)) losses.update(loss_cls.item()) top1.update(prec1.item()) top5.update(prec5.item()) batch_time.update(time.time() - end) end = time.time() # measure elapsed time if curr_step % args.print_freq == 0: tb_logger.add_scalar('loss_train', losses.avg, curr_step) tb_logger.add_scalar('acc1_train', top1.avg, curr_step) tb_logger.add_scalar('acc5_train', top5.avg, curr_step) tb_logger.add_scalar('lr', current_lr, curr_step) print(args.exp_name) logger.info('Iter: [{0}/{1}]\t' 'Time: {batch_time.val:.3f}\t' 'Data: {data_time.val:.3f}\t' 'loss: {loss.val:.4f}\t' 'loss_aug: {loss_aug.val:.4f}\t' 'loss_bal: {loss_bal.val:.4f}\t' 'Prec@1: {top1.val:.3f}\t' 'Prec@5: {top5.val:.3f}\t' 'lr: {lr:.6f}'.format(curr_step, len(train_source_loader), batch_time=batch_time, data_time=data_time, loss=losses, loss_aug=losses_aug, loss_bal=losses_bal, top1=top1, top5=top5, lr=current_lr)) if (curr_step + 1) % args.val_freq == 0: val_loss, prec1, prec5 = validate(val_loader, teacher_model, criterion) if not tb_logger is None: tb_logger.add_scalar('loss_val', val_loss, curr_step) tb_logger.add_scalar('acc1_val', prec1, curr_step) tb_logger.add_scalar('acc5_val', prec5, curr_step) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) logger.info("best val prec1 {}".format(best_prec1)) save_checkpoint( { 'step': curr_step, 'arch': args.arch, 'state_dict': teacher_model.state_dict(), 'best_prec1': best_prec1, 'student_optimizer': student_optimizer.state_dict(), }, is_best, args.save_path + '/ckpt')
def val_emotion(encoder, decoder, vocab, criterion, data_loaders, tags): decoder.eval() encoder.eval() batch_time = AverageMeter() losses = [AverageMeter() for _ in range(len(tags))] top5accs = [AverageMeter() for _ in range(len(tags))] bleu4s = [] start = time.time() for j in range(len(tags)): # references (true captions) for calculating BLEU-4 score references = list() # hypotheses (predictions) hypotheses = list() for i, (images, captions, lengths, all_captions) in enumerate(data_loaders[j]): # Set mini-batch dataset images = images.to(device) captions = captions.to(device) packed_targets = pack_padded_sequence(input=captions, lengths=lengths, batch_first=True) targets = packed_targets.data # Forward, backward and optimize with torch.no_grad(): features = encoder(images) outputs = decoder(captions, lengths, features, teacher_forcing_ratio=0) loss = criterion(outputs, targets) # Keep track of metrics losses[j].update(loss.item(), sum(lengths)) top5 = accuracy(outputs, targets, 5) top5accs[j].update(top5, sum(lengths)) batch_time.update(time.time() - start) # unpacked outputs scores = outputs.clone() scores = PackedSequence(scores, packed_targets.batch_sizes) scores = pad_packed_sequence(scores, batch_first=True) start = vocab.word2idx['<start>'] end = vocab.word2idx['<end>'] all_caps = deepcopy(all_captions) for caps in all_caps: caps = [c.long().tolist() for c in caps] caps = [[w for w in c if w != start and w != end] for c in caps] references.append(caps) preds = list() for s, l in zip(scores[0], scores[1]): _, pred = torch.max(s, dim=1) pred = pred.tolist()[:l] pred = [w for w in pred if w != start and w != end] preds.append(pred) hypotheses.extend(preds) assert len(references) == len(hypotheses) # free del images del captions del lengths del all_captions del packed_targets del outputs torch.cuda.empty_cache() # Calculate BLEU-4 scores bleu4 = corpus_bleu(references, hypotheses) bleu4s.append(bleu4) feature = features[0].unsqueeze(0) start_token = vocab.word2idx['<start>'] end_token = vocab.word2idx['<end>'] sampled_ids = decoder.sample(feature, start_token=start_token, end_token=end_token) sampled_ids = sampled_ids[0].cpu().numpy() # Convert word_ids to words sampled_caption = [] for word_id in sampled_ids: word = vocab.idx2word[word_id] sampled_caption.append(word) if word == '<end>': break print(sampled_caption) top5accs = [top5acc.avg for top5acc in top5accs] losses = [loss.avg for loss in losses] return batch_time.val, top5accs, losses, bleu4s
def _train_loop_epoch(model, data_loader, step_op, optimizer, global_iter, writer, tb_suffix, args): """ The inner training loop of an epoch. :param model: The PyTorch nn.Module to train :param data_loader: A PyTorch data loader that is used to provide minibatches :param step_op: This should either by 'validation_loss' to compute validation losses or 'update_op' to update weights in the network (as a side effect of the 'update_op' function) :param optimizer: A PyTorch optimizer (just passed to the step_op) :param global_iter: The current global step (n.b. this is a local variable) :param bar: A progress bar to update the CLI on training :param writer: A tensorboardX summary writer :param tb_suffix: A suffix to append to any tensorboard logging (to differentiate between train and val plots in tensorboard). :return: A dictionary of losses, keyed by strings, the 'name' for each loss. :param args: Argparser arguments, used to provide model specific parameters. """ # Average meters for the losses and times, for the progress bar batch_total_time = AverageMeter() data_load_time = AverageMeter() avg_losses_dict = defaultdict(AverageMeter) bar = Bar('Processing', max=len(data_loader)) iter_end_time = time.time() iter = 0 for minibatch_data in data_loader: # Compute the time needed to load the minibatch data_load_time.update(time.time() - iter_end_time) # Make a step losses = step_op(model, optimizer, minibatch_data, global_iter, args) global_iter += 1 iter += 1 batch_total_time.update(time.time() - iter_end_time) # Tensorboard plotting, logging per minibatch if global_iter % args.tb_log_freq == 0: for key in losses: scalar_name = string.join([key, tb_suffix], '') writer.add_scalar(scalar_name, losses[key], global_iter) # Update averages and progress bar prog_str_list = [] prog_str_list.append( '({batch}/{size}) Data: {data:.6f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}' .format(batch=iter + 1, size=len(data_loader), data=data_load_time.val, bt=batch_total_time.val, total=bar.elapsed_td, eta=bar.eta_td)) bar.suffix = string.join(prog_str_list, '') bar.next() # update the time for the next iteration iter_end_time = time.time() bar.finish() # return the average losses (as floats) return {key: avg_losses_dict[key].avg for key in avg_losses_dict}
def train_denoiser(train_loader, val_loader, model, criterion, optimizer, epoch, args): """Training the model""" # adjust the learning rate num_iters = len(train_loader) lr = 0.0 # set up meters batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() # switch to train mode model.train() end = time.time() for i, (images, target, image_id) in enumerate(train_loader): # cosine learning rate decay lr = 0.5 * args.lr * (1 + math.cos( (epoch * num_iters + i) / float(args.epochs * num_iters) * math.pi)) for param_group in optimizer.param_groups: param_group['lr'] = lr param_group['weight_decay'] = args.weight_decay # measure data loading time data_time.update(time.time() - end) if args.gpu >= 0: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) image_id = image_id.cuda(args.gpu, non_blocking=True) input = images[:, 0, :, :, :] input_clean = images[:, 1, :, :, :] output = model(input) loss = criterion[1](output, input_clean) losses.update(loss.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time if args.gpu >= 0: torch.cuda.synchronize() batch_time.update(time.time() - end) end = time.time() # printing if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.2f} ({loss.avg:.2f}))'.format( epoch + 1, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses)) writer.add_scalar('data/training_loss', losses.val, epoch * num_iters + i) writer.add_scalar('data/learning_rate', lr, epoch * num_iters + i) avg_psnr = validate_denoiser(val_loader, model, criterion, args) print("===> Avg. PSNR: {:.4f} dB".format(avg_psnr)) model.train() state_dic = {} state_dic['state_dict'] = model.state_dict() save_model(state_dic, args.experiment + "/models") print("Epoch {:d} finished with lr={:f}".format(epoch + 1, lr)) return
def train(model, device, train_loader, val_loader, comment): model = model.to(device) criterion = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5) saver = BestSaver(comment) epochs = 50 for epoch in range(1, epochs + 1): logging.info("Train Phase, Epoch: {}".format(epoch)) scheduler.step() total_losses = AverageMeter() clf_losses = AverageMeter() vse_losses = AverageMeter() # Train phase model.train() for batch_num, batch in enumerate(train_loader, 1): lengths, images, names, offsets, set_ids, labels, is_compat = batch images = images.to(device) # Forward output, vse_loss, tmasks_loss, features_loss = model(images, names) # BCE Loss target = is_compat.float().to(device) output = output.squeeze(dim=1) clf_loss = criterion(output, target) # Sum all losses up features_loss = 5e-3 * features_loss tmasks_loss = 5e-4 * tmasks_loss total_loss = clf_loss + vse_loss + features_loss + tmasks_loss # Update Recoder total_losses.update(total_loss.item(), images.shape[0]) clf_losses.update(clf_loss.item(), images.shape[0]) vse_losses.update(vse_loss.item(), images.shape[0]) # Backpropagation model.zero_grad() total_loss.backward() optimizer.step() if batch_num % 10 == 0: logging.info( "[{}/{}] #{} clf_loss: {:.4f}, vse_loss: {:.4f}, features_loss: {:.4f}, tmasks_loss: {:.4f}, total_loss:{:.4f}" .format(epoch, epochs, batch_num, clf_losses.val, vse_losses.val, features_loss, tmasks_loss, total_losses.val)) logging.info("Train Loss (clf_loss): {:.4f}".format(clf_losses.avg)) # Valid Phase logging.info("Valid Phase, Epoch: {}".format(epoch)) model.eval() clf_losses = AverageMeter() outputs = [] targets = [] for batch_num, batch in enumerate(val_loader, 1): lengths, images, names, offsets, set_ids, labels, is_compat = batch images = images.to(device) target = is_compat.float().to(device) with torch.no_grad(): output, _, _, _ = model._compute_score(images) output = output.squeeze(dim=1) clf_loss = criterion(output, target) clf_losses.update(clf_loss.item(), images.shape[0]) outputs.append(output) targets.append(target) logging.info("Valid Loss (clf_loss): {:.4f}".format(clf_losses.avg)) outputs = torch.cat(outputs).cpu().data.numpy() targets = torch.cat(targets).cpu().data.numpy() auc = metrics.roc_auc_score(targets, outputs) logging.info("AUC: {:.4f}".format(auc)) predicts = np.where(outputs > 0.5, 1, 0) accuracy = metrics.accuracy_score(predicts, targets) logging.info("[email protected]: {:.4f}".format(accuracy)) positive_loss = -np.log(outputs[targets == 1]).mean() logging.info("Positive loss: {:.4f}".format(positive_loss)) positive_acc = sum(outputs[targets == 1] > 0.5) / len(outputs) logging.info("Positive accuracy: {:.4f}".format(positive_acc)) # Save best model saver.save(auc, model.state_dict())
def train(train_loader, model, criterion, optimizer, epoch, log, train_dataset): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # TODO Do this in dataset specific file, this is just to get it to run. #target = np.array([0, 1, 1, 1, 1, 1, 1, 0, 0, 0]) # Just for debugging yesno input = train_dataset.preprocess_input(input) target = train_dataset.preprocess_input(target) target = target.type(torch.LongTensor) try: target = torch.from_numpy(target) except RuntimeError: pass input = input.view(input.size(0), 1, input.size(1)) # Flip channels and length # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) output = train_dataset.postprocess_target(output) loss = criterion(output, target_var) entropy_val = 0 perplexity_val = 0 for batch_output in output: entropy_val += get_entropy(softmax(batch_output.data.cpu().numpy())) perplexity_val += get_perplexity(entropy_val) # measure accuracy and record loss #prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) prec1, prec5 = accuracy(output.data, target, topk=(1, 2)) # we don't have 5 classes yet lol losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) if i == 0: writer.add_scalar('Training Loss', loss.data[0], epoch) writer.add_scalar('Accuracy Top 1', prec1[0], epoch) writer.add_scalar('Accuracy Top 5', prec5[0], epoch) writer.add_scalar('Entropy', entropy_val, epoch) writer.add_scalar('Perplexity', perplexity_val, epoch) for input_index in range(len(input)): audio = input[input_index][0] writer.add_audio("Training: Epoch" + str(epoch) + " batch number " + str(input_index), audio, sample_rate=16000) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log(' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log) return top1.avg, losses.avg
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(async=True) input_var = torch.autograd.Variable(input).cuda() target_var = torch.autograd.Variable(target).cuda() # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) print('Epoch: [{0}]\tLoss {loss.avg:.4f}\tPrec@1 {top1.avg:.3f}\t' 'Time {batch_time.avg:.3f}\tData {data_time.avg:.3f}\t'.format( epoch, loss=losses, top1=top1, batch_time=batch_time, data_time=data_time)) train_logger.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format( top1.avg, top5.avg, losses.avg, batch_time.avg, data_time.avg)) return losses.avg