def simclr_fine_tune_train(train_loader, model, criterion, optimizer, epoch): losses = AverageMeter('Loss', ':.4e') acc = AverageMeter('Acc@1', ':6.2f') progress = ProgressMeter(len(train_loader), [losses, acc], prefix="Epoch: [{}]".format(epoch)) model.train() for i, batch in enumerate(train_loader): images = batch['image'] # images_augmented = batch['image_augmented'] # b, c, h, w = images.size() # input_ = torch.cat([images.unsqueeze(1), images_augmented.unsqueeze(1)], dim=1) # input_ = input_.view(-1, c, h, w) # input_ = input_.cuda(non_blocking=True) input_ = images.cuda(non_blocking=True) targets = np_to_tensor_safe(batch['target']).cuda(non_blocking=True) output = model(input_) if isinstance(output, tuple): output = output[0] loss = criterion(output, targets) losses.update(loss.item()) acc1 = 100 * torch.mean( torch.eq(torch.argmax(output, dim=1), targets).float()) acc.update(acc1) optimizer.zero_grad() loss.backward() optimizer.step() if i % 25 == 0: progress.display(i)
def selflabel_train(train_loader, model, criterion, optimizer, epoch, ema=None): """ Self-labeling based on confident samples """ losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(train_loader), [losses], prefix="Epoch: [{}]".format(epoch)) model.train() for i, batch in enumerate(train_loader): images = batch['image'].cuda(non_blocking=True) images_augmented = batch['image_augmented'].cuda(non_blocking=True) with torch.no_grad(): output = model(images)[0] output_augmented = model(images_augmented)[0] loss = criterion(output, output_augmented) losses.update(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() if ema is not None: # Apply EMA to update the weights of the network ema.update_params(model) ema.apply_shadow(model) if i % 25 == 0: progress.display(i)
def simclr_train(train_loader, model, criterion, optimizer, epoch): """ Train according to the scheme from SimCLR https://arxiv.org/abs/2002.05709 """ losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(train_loader), [losses], prefix="Epoch: [{}]".format(epoch)) model.train() for i, batch in enumerate(train_loader): images = batch['image'] images_augmented = batch['image_augmented'] b, c, h, w = images.size() input_ = torch.cat( [images.unsqueeze(1), images_augmented.unsqueeze(1)], dim=1) input_ = input_.view(-1, c, h, w) input_ = input_.to(device, non_blocking=True) targets = batch['target'].to(device, non_blocking=True) output = model(input_).view(b, 2, -1) loss = criterion(output) losses.update(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() if i % 25 == 0: progress.display(i)
def train(args, train_loader, model, auxiliarynet, criterion, optimizer, epoch): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') error = AverageMeter('error', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, error, prefix="Train Epoch: [{}]".format(epoch)) # switch to train mode model.train() auxiliarynet.train() end = time.time() for batch_idx, (eyes, face, gaze_norm_g, head_norm, rot_vec_norm) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) eyes.requires_grad = False eyes = eyes.to(args.device) face.requires_grad = False face = face.to(args.device) gaze_norm_g.requires_grad = False gaze_norm_g = gaze_norm_g.to(args.device) head_norm.requires_grad = False head_norm = head_norm.to(args.device) rot_vec_norm.requires_grad = False rot_vec_norm = rot_vec_norm.to(args.device) face_feature = auxiliarynet(face) # gaze_pred, head_pred = model(eyes, face_feature) gaze_pred = model(eyes, face_feature) # print(features.size()) head_norm = 100 * head_norm gaze_norm_g = 100 * gaze_norm_g loss = criterion(gaze_norm_g, head_norm, gaze_pred[:, 0:2], gaze_pred[:, 2:4]) # loss = criterion(gaze_norm_g, head_norm, gaze_pred, None) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() angle_error = mean_angle_error(gaze_pred[:,0:2].cpu().detach().numpy()/100, gaze_norm_g.cpu().detach().numpy()/100, rot_vec_norm.cpu().detach().numpy()) losses.update(loss.item(), eyes.size(0)) error.update(angle_error, eyes.size(0)) if(batch_idx + 1) % args.print_freq == 0: progress.print(batch_idx+1) return losses.get_avg(), error.get_avg()
def validate(args, val_dataloader, model, auxiliarynet, epoch): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') error = AverageMeter('error', ':6.2f') progress = ProgressMeter(len(val_dataloader), batch_time, data_time, losses, error, prefix="Val Epoch: [{}]".format(epoch)) model.eval() # auxiliarynet.eval() end = time.time() with torch.no_grad(): end = time.time() for i, (patch, gaze_norm_g, head_norm, rot_vec_norm) in enumerate(val_dataloader): # measure data loading time data_time.update(time.time() - end) patch = patch.to(args.device) gaze_norm_g = gaze_norm_g.to(args.device) head_norm = head_norm.to(args.device) rot_vec_norm = rot_vec_norm.to(args.device) # model = model.to(args.device) gaze_pred, _ = model(patch) # hp_pred = auxiliarynet(features) head_norm = 10 * head_norm gaze_norm_g = 10 * gaze_norm_g # loss = criterion(gaze_norm_g, head_norm, gaze_pred[:,0:2], gaze_pred[:,2:4]) angle_error = mean_angle_error( gaze_pred.cpu().detach().numpy() / 10, gaze_norm_g.cpu().detach().numpy() / 10, rot_vec_norm.cpu().detach().numpy()) # losses.update(loss.item()) error.update(angle_error) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: progress.print(i + 1) # img = patch.cpu().detach().numpy()[0].deepcopy() # to_visualize = draw_gaze(img[0], (0.25 * img.shape[1], 0.25 * img.shape[1]), gaze_pred, # gaze_norm_g, length=80.0, thickness=1) # cv2.imshow('vis', to_visualize) # cv2.waitKey(1) return losses.get_avg(), error.get_avg()
def scan_train(train_loader, model, criterion, optimizer, epoch, update_cluster_head_only=False): """ Train w/ SCAN-Loss """ total_losses = AverageMeter('Total Loss', ':.4e') consistency_losses = AverageMeter('Consistency Loss', ':.4e') entropy_losses = AverageMeter('Entropy', ':.4e') progress = ProgressMeter(len(train_loader), [total_losses, consistency_losses, entropy_losses], prefix="Epoch: [{}]".format(epoch)) if update_cluster_head_only: model.eval() # No need to update BN else: model.train() # Update BN for i, batch in enumerate(train_loader): # Forward pass anchors = batch['anchor'].cuda(non_blocking=True) neighbors = batch['neighbor'].cuda(non_blocking=True) if update_cluster_head_only: # Only calculate gradient for backprop of linear layer with torch.no_grad(): anchors_features = model(anchors, forward_pass='******') neighbors_features = model(neighbors, forward_pass='******') anchors_output = model(anchors_features, forward_pass='******') neighbors_output = model(neighbors_features, forward_pass='******') else: # Calculate gradient for backprop of complete network anchors_output = model(anchors) neighbors_output = model(neighbors) # Loss for every head total_loss, consistency_loss, entropy_loss = [], [], [] for anchors_output_subhead, neighbors_output_subhead in zip(anchors_output, neighbors_output): total_loss_, consistency_loss_, entropy_loss_ = criterion(anchors_output_subhead, neighbors_output_subhead) total_loss.append(total_loss_) consistency_loss.append(consistency_loss_) entropy_loss.append(entropy_loss_) # Register the mean loss and backprop the total loss to cover all subheads total_losses.update(np.mean([v.item() for v in total_loss])) consistency_losses.update(np.mean([v.item() for v in consistency_loss])) entropy_losses.update(np.mean([v.item() for v in entropy_loss])) total_loss = torch.sum(torch.stack(total_loss, dim=0)) optimizer.zero_grad() total_loss.backward() optimizer.step() if i % 25 == 0: progress.display(i)
def train_segmentation_vanilla(p, train_loader, model, criterion, optimizer, epoch, freeze_batchnorm='none'): """ Train a segmentation model in a fully-supervised manner """ losses = AverageMeter('Loss', ':.4e') semseg_meter = SemsegMeter(p['num_classes'], train_loader.dataset.get_class_names(), p['has_bg'], ignore_index=255) progress = ProgressMeter(len(train_loader), [losses], prefix="Epoch: [{}]".format(epoch)) model.train() if freeze_batchnorm == 'none': print('BatchNorm tracks running stats - model put to train mode.') pass elif freeze_batchnorm == 'backbone': print('Freeze BatchNorm in the backbone - backbone put to eval mode.') model.backbone.eval() # Put encoder to eval elif freeze_batchnorm == 'all': # Put complete model to eval print('Freeze BatchNorm - model put to eval mode.') model.eval() else: raise ValueError( 'Invalid value freeze batchnorm {}'.format(freeze_batchnorm)) for i, batch in enumerate(train_loader): images = batch['image'].cuda(non_blocking=True) targets = batch['semseg'].cuda(non_blocking=True) output = model(images) loss = criterion(output, targets) losses.update(loss.item()) semseg_meter.update(torch.argmax(output, dim=1), targets) optimizer.zero_grad() loss.backward() optimizer.step() if i % 100 == 0: progress.display(i) eval_results = semseg_meter.return_score(verbose=True) return eval_results
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) model.train() end = time.time() for i, (inputs, target) in enumerate(train_loader): data_time.update(time.time() - end) if torch.cuda.is_available(): inputs = inputs.cuda() target = target.cuda() # non_blocking # compute output output = model(inputs) loss = criterion( output, target) # reduction='mean' --> mean loss for per sample # computer gradient and optimizer step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy + loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1[0], inputs.size(0)) top5.update(acc5[0], inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % PRINT_FREQ == 0: progress.display(i)
def valid(test_loader, model, criterion, epoch): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(test_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) model.eval() end = time.time() with torch.no_grad(): # reduce memory consumption for computations for i, (inputs, target) in enumerate(test_loader): data_time.update(time.time() - end) if torch.cuda.is_available(): inputs = inputs.cuda() target = target.cuda() output = model(inputs) loss = criterion(output, target) losses.update(loss.item(), inputs.size(0)) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], inputs.size(0)) top5.update(acc5[0], inputs.size(0)) # measure batch time batch_time.update(time.time() - end) end = time.time() if i % PRINT_FREQ == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg
def train_vanilla(p, train_loader, model, criterion, optimizer, epoch): """ Vanilla training with fixed loss weights """ losses = get_loss_meters(p) performance_meter = PerformanceMeter(p) progress = ProgressMeter(len(train_loader), [v for v in losses.values()], prefix="Epoch: [{}]".format(epoch)) model.train() for i, batch in enumerate(train_loader): # Forward pass images = batch['image'].cuda(non_blocking=True) targets = { task: batch[task].cuda(non_blocking=True) for task in p.ALL_TASKS.NAMES } output = model(images) # Measure loss and performance loss_dict = criterion(output, targets) for k, v in loss_dict.items(): losses[k].update(v.item()) performance_meter.update( {t: get_output(output[t], t) for t in p.TASKS.NAMES}, {t: targets[t] for t in p.TASKS.NAMES}) # Backward optimizer.zero_grad() loss_dict['total'].backward() optimizer.step() if i % 25 == 0: progress.display(i) eval_results = performance_meter.get_score(verbose=True) return eval_results
def train(p, train_loader, model, optimizer, epoch, amp): losses = AverageMeter('Loss', ':.4e') contrastive_losses = AverageMeter('Contrastive', ':.4e') saliency_losses = AverageMeter('CE', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [losses, contrastive_losses, saliency_losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) model.train() if p['freeze_layers']: model = freeze_layers(model) for i, batch in enumerate(train_loader): # Forward pass im_q = batch['query']['image'].cuda(p['gpu'], non_blocking=True) im_k = batch['key']['image'].cuda(p['gpu'], non_blocking=True) sal_q = batch['query']['sal'].cuda(p['gpu'], non_blocking=True) sal_k = batch['key']['sal'].cuda(p['gpu'], non_blocking=True) logits, labels, saliency_loss = model(im_q=im_q, im_k=im_k, sal_q=sal_q, sal_k=sal_k) # Use E-Net weighting for calculating the pixel-wise loss. uniq, freq = torch.unique(labels, return_counts=True) p_class = torch.zeros(logits.shape[1], dtype=torch.float32).cuda(p['gpu'], non_blocking=True) p_class_non_zero_classes = freq.float() / labels.numel() p_class[uniq] = p_class_non_zero_classes w_class = 1 / torch.log(1.02 + p_class) contrastive_loss = cross_entropy(logits, labels, weight=w_class, reduction='mean') # Calculate total loss and update meters loss = contrastive_loss + saliency_loss contrastive_losses.update(contrastive_loss.item()) saliency_losses.update(saliency_loss.item()) losses.update(loss.item()) acc1, acc5 = accuracy(logits, labels, topk=(1, 5)) top1.update(acc1[0], im_q.size(0)) top5.update(acc5[0], im_q.size(0)) # Update model optimizer.zero_grad() if amp is not None: # Mixed precision with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() # Display progress if i % 25 == 0: progress.display(i)
def train_one_epoch(data_loader, model, criterion, optimizer, lr_scheduler, transforms_cuda, epoch, args): batch_time = AverageMeter('Time',':.2f') data_time = AverageMeter('Data',':.2f') losses = AverageMeter('Loss',':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter], prefix='Epoch:[{}]'.format(epoch)) model.train() def tr(x): B = x.size(0) return transforms_cuda(x).view(B,3,args.num_seq,args.seq_len,args.img_dim,args.img_dim)\ .transpose(1,2).contiguous() end = time.time() for idx, input_seq in tqdm(enumerate(data_loader), total=len(data_loader)): data_time.update(time.time() - end) B = input_seq.size(0) input_seq = tr(input_seq.cuda(non_blocking=True)) output, target = model(input_seq) if args.model == 'infonce': # 'target' is the index of self loss = criterion(output, target) top1, top5 = calc_topk_accuracy(output, target, (1,5)) if args.model == 'ubernce': # 'target' is the binary mask # optimize all positive pairs, compute the mean for num_pos and for batch_size loss = - (F.log_softmax(output, dim=1) * target).sum(1) / target.sum(1) loss = loss.mean() top1, top5 = calc_mask_accuracy(output, target, (1,5)) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) losses.update(loss.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() progress.display(idx) if idx % args.print_freq == 0: if args.print: args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.iteration += 1 print('({gpu:1d})Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), gpu=args.rank, t=time.time()-tic)) if args.print: args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) return losses.avg, top1_meter.avg
def train(device, train_loader, model, criterion, optimizer, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') error = AverageMeter('error', ':6.2f') progress = ProgressMeter(len(train_loader), batch_time, data_time, losses, error, prefix="Train Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for batch_idx, (patch, gaze_norm_g, head_norm, rot_vec_norm) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) patch.requires_grad = False patch = patch.to(device) gaze_norm_g.requires_grad = False gaze_norm_g = gaze_norm_g.to(device) head_norm.requires_grad = False head_norm = head_norm.to(device) rot_vec_norm.requires_grad = False rot_vec_norm = rot_vec_norm.to(device) model = model.to(device) gaze_norm_gt = torch.clamp((100 * (gaze_norm_g + 0.75)).long(), -74, 75, out=None) gaze_p, gaze_y = model(patch) loss = criterion(gaze_p, gaze_norm_gt[:, 0]) + criterion( gaze_y, gaze_norm_gt[:, 1]) optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() _, preds_p = torch.max(gaze_p, 1) _, preds_y = torch.max(gaze_y, 1) gaze_pred = torch.cat( [preds_p.view(patch.size(0), 1), preds_y.view(patch.size(0), 1)], 1) angle_error = mean_angle_error( gaze_pred.cpu().detach().numpy().astype(float) / 100 - 0.75, gaze_norm_g.cpu().detach().numpy(), rot_vec_norm.cpu().detach().numpy()) losses.update(loss.item(), patch.size(0)) error.update(angle_error, patch.size(0)) if (batch_idx + 1) % args.print_freq == 0: progress.print(batch_idx + 1) return losses.get_avg(), error.get_avg()
def validate(device, val_dataloader, model, criterion, epoch, args): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') error = AverageMeter('error', ':6.2f') progress = ProgressMeter(len(val_dataloader), batch_time, data_time, losses, error, prefix="Val Epoch: [{}]".format(epoch)) model.eval() with torch.no_grad(): end = time.time() for i, (patch, gaze_norm_g, head_norm, rot_vec_norm) in enumerate(val_dataloader): # measure data loading time data_time.update(time.time() - end) patch = patch.to(device) gaze_norm_g = gaze_norm_g.to(device) head_norm = head_norm.to(device) rot_vec_norm = rot_vec_norm.to(device) model = model.to(device) gaze_norm_gt = torch.clamp((100 * (gaze_norm_g + 0.75)).long(), -74, 75, out=None) gaze_p, gaze_y = model(patch) loss = criterion(gaze_p, gaze_norm_gt[:, 0]) + criterion( gaze_y, gaze_norm_gt[:, 1]) _, preds_p = torch.max(gaze_p, 1) _, preds_y = torch.max(gaze_y, 1) gaze_pred = torch.cat([ preds_p.view(patch.size(0), 1), preds_y.view(patch.size(0), 1) ], 1) angle_error = mean_angle_error( gaze_pred.cpu().detach().numpy().astype(float) / 100 - 0.75, gaze_norm_g.cpu().detach().numpy(), rot_vec_norm.cpu().detach().numpy()) losses.update(loss.item(), patch.size(0)) error.update(angle_error, patch.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: progress.print(i + 1) return losses.get_avg(), error.get_avg()
def simclr_train(train_loader, model, criterion, optimizer, epoch, augs_criterion=None): """ Train according to the scheme from SimCLR https://arxiv.org/abs/2002.05709 """ losses = AverageMeter('Loss', ':.4e') if augs_criterion is None: progress = ProgressMeter(len(train_loader), [losses], prefix="Epoch: [{}]".format(epoch)) else: augs_losses = AverageMeter('Augs Loss', ':.4e') augs_iou_abs_diffs = AverageMeter('Augs IoU abs-diff', ':.4f') augs_flip_accs = AverageMeter('Augs flip acc', ':.2f') augs_brightness_abs_diffs = AverageMeter('Augs brightness abs-diff', ':.4f') augs_contrast_abs_diffs = AverageMeter('Augs contrast abs-diff', ':.4f') augs_saturation_abs_diffs = AverageMeter('Augs saturation abs-diff', ':.4f') augs_hue_abs_diffs = AverageMeter('Augs hue abs-diff', ':.4f') # augs_color_jitter_accs = AverageMeter('Augs jitter acc', ':.2f') augs_grayscale_accs = AverageMeter('Augs gray acc', ':.2f') progress = ProgressMeter(len(train_loader), [ losses, augs_losses, augs_iou_abs_diffs, augs_flip_accs, augs_brightness_abs_diffs, augs_contrast_abs_diffs, augs_saturation_abs_diffs, augs_hue_abs_diffs, augs_grayscale_accs ], prefix="Epoch: [{}]".format(epoch)) # progress = ProgressMeter(len(train_loader), # [losses, augs_losses, augs_iou_abs_diffs, augs_flip_accs, augs_color_jitter_accs, augs_grayscale_accs], # prefix="Epoch: [{}]".format(epoch)) model.train() for i, batch in enumerate(train_loader): images = batch['image'] images_augmented = batch['image_augmented'] b, c, h, w = images.size() input_ = torch.cat( [images.unsqueeze(1), images_augmented.unsqueeze(1)], dim=1) input_ = input_.view(-1, c, h, w) input_ = input_.cuda(non_blocking=True) targets = batch['target'].cuda(non_blocking=True) # aug_targets1 = torch.cat([a.unsqueeze(1) for a in batch['image_aug_labels'][4:7]], dim=1) # aug_targets2 = torch.cat([a.unsqueeze(1) for a in batch['image_augmented_aug_labels'][4:7]], dim=1) # aug_targets = torch.cat([aug_targets1, aug_targets2]) # aug_targets = aug_targets.cuda(non_blocking=True) if augs_criterion is None: output = model(input_).view(b, 2, -1) loss = criterion(output) losses.update(loss.item()) else: aug_targets = torch.cat( [a.unsqueeze(1) for a in batch['aug_labels']], dim=1) # aug_targets = torch.cat([aug_targets, aug_targets]) aug_targets = aug_targets.cuda(non_blocking=True) output, augs_output = model(input_) output = output.view(b, 2, -1) loss = criterion(output) losses.update(loss.item()) augs_loss = augs_criterion(augs_output, aug_targets) augs_losses.update(augs_loss.item()) loss = sum([loss, augs_loss]) # loss = augs_loss # TEMP!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! abs_diff = torch.abs(augs_output - aug_targets) augs_iou_abs_diff = abs_diff[:, 0].mean().item() # assert ((aug_targets[:, 1:] == 0) | (aug_targets[:, 1:] == 1)).all().item() flip_acc = (abs_diff[:, 1] < 0.5).float().mean() gray_acc = (abs_diff[:, 6] < 0.5).float().mean() mean_abs_diff = abs_diff[:, 2:6].mean(axis=0) augs_brightness_abs_diffs.update(mean_abs_diff[0].item()) augs_contrast_abs_diffs.update(mean_abs_diff[1].item()) augs_saturation_abs_diffs.update(mean_abs_diff[2].item()) augs_hue_abs_diffs.update(mean_abs_diff[3].item()) augs_iou_abs_diffs.update(augs_iou_abs_diff) augs_flip_accs.update(flip_acc.item() * 100) augs_grayscale_accs.update(gray_acc.item() * 100) # augs_res = (abs_diff[:, 1:] < 0.5).float().mean(axis=0) # augs_iou_abs_diffs.update(augs_iou_abs_diff) # augs_flip_accs.update(augs_res[0].item() * 100) # augs_color_jitter_accs.update(augs_res[1].item() * 100) # augs_grayscale_accs.update(augs_res[2].item() * 100) # # augs_grayscale_accs.update((abs_diff < 0.5).float().mean() * 100) optimizer.zero_grad() loss.backward() optimizer.step() if i % 25 == 0: progress.display(i)
def train_one_epoch(data_loader, model, criterion, optimizer, transforms_cuda, epoch, args): batch_time = AverageMeter('Time', ':.2f') data_time = AverageMeter('Data', ':.2f') losses = AverageMeter('Loss', ':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') sacc_meter = AverageMeter('Sampling-Acc@%d' % args.topk, ':.2f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter, sacc_meter], prefix='Epoch:[{}]'.format(epoch)) model.train() model.module.sampler.eval() # the sampler is always fixed def tr(x): B = x.size(0) return transforms_cuda(x).view(B, 3, args.num_seq, args.seq_len, args.img_dim, args.img_dim).transpose(1, 2).contiguous() end = time.time() for idx, (input_seq, vname, _) in enumerate(data_loader): data_time.update(time.time() - end) B = input_seq[0].size(0) input_seq = [tr(i.cuda(non_blocking=True)) for i in input_seq] vname = vname.cuda(non_blocking=True) output, mask = model(*input_seq, vname) mask_sum = mask.sum(1) if random.random() < 0.9: # because model has been pretrained with infoNCE, # in this stage, self-similarity is already very high, # randomly mask out the self-similarity for optimization efficiency, mask_clone = mask.clone() mask_clone[mask_sum != 1, 0] = 0 # mask out self-similarity loss = multi_nce_loss(output, mask_clone) else: loss = multi_nce_loss(output, mask) top1, top5 = calc_mask_accuracy(output, mask, (1, 5)) losses.update(loss.item(), B) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: progress.display(idx) if args.print: args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.train_plotter.add_data('local/top5', top5_meter.local_avg, args.iteration) args.iteration += 1 print('({gpu:1d})Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), gpu=args.rank, t=time.time() - tic)) if args.print: args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) return losses.avg, top1_meter.avg
def train_one_epoch(data_loader, model, criterion, optimizer, transforms_cuda, epoch, args): batch_time = AverageMeter('Time',':.2f') data_time = AverageMeter('Data',':.2f') losses = AverageMeter('Loss',':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') top1_self_meter = AverageMeter('Self-acc@1', ':.4f') top5_self_meter = AverageMeter('Self-acc@5', ':.4f') sacc_meter = AverageMeter('Sampling-Acc@%d' % args.topk, ':.2f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter, top1_self_meter, top5_self_meter, sacc_meter], prefix='Epoch:[{}]'.format(epoch)) model.train() model.module.sampler.eval() # the sampler is always fixed def tr(x): B = x.size(0) return transforms_cuda(x).view(B,3,args.num_seq,args.seq_len,args.img_dim,args.img_dim).transpose(1,2).contiguous() tic = time.time() end = time.time() for idx, (input_seq, vname, _) in enumerate(data_loader): data_time.update(time.time() - end) B = input_seq[0].size(0) input_seq = [tr(i.cuda(non_blocking=True)) for i in input_seq] vname = vname.cuda(non_blocking=True) output, mask = model(*input_seq, vname) mask_sum = mask.sum(1) loss = multi_nce_loss(output, mask) top1, top5 = calc_mask_accuracy(output, mask, (1,5)) top1_self, top5_self = calc_topk_accuracy(output, torch.zeros(B, dtype=torch.long).cuda(), (1,5)) del output losses.update(loss.item(), B) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) top1_self_meter.update(top1_self.item(), B) top5_self_meter.update(top5_self.item(), B) if model.module.queue_is_full: optimizer.zero_grad() loss.backward() optimizer.step() del loss torch.cuda.empty_cache() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: progress.display(idx) if args.print: args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.train_plotter.add_data('local/top5', top5_meter.local_avg, args.iteration) args.train_plotter.add_data('local/self-top1', top1_self_meter.local_avg, args.iteration) args.train_plotter.add_data('local/self-top5', top5_self_meter.local_avg, args.iteration) args.iteration += 1 print('({gpu:1d})Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), gpu=args.rank, t=time.time()-tic)) if args.print: args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) args.train_plotter.add_data('global/self-top1', top1_self_meter.avg, epoch) args.train_plotter.add_data('global/self-top5', top5_self_meter.avg, epoch) return losses.avg, top1_meter.avg
def train_one_epoch(data_loader, model, criterion, optimizer, transforms_cuda, device, epoch, args): batch_time = AverageMeter('Time', ':.2f') data_time = AverageMeter('Data', ':.2f') losses = AverageMeter('Loss', ':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter], prefix='Epoch:[{}]'.format(epoch)) if args.train_what == 'last': model.eval() # totally freeze BN in backbone else: model.train() if args.final_bn: model.module.final_bn.train() end = time.time() tic = time.time() def tr(x): # transformation on tensor B = x.size(0) return transforms_cuda(x).view(B,3,args.num_seq,args.seq_len,args.img_dim,args.img_dim)\ .transpose(1,2).contiguous() for idx, (input_seq, target) in enumerate(data_loader): data_time.update(time.time() - end) B = input_seq.size(0) input_seq = tr(input_seq.to(device, non_blocking=True)) target = target.to(device, non_blocking=True) logit, _ = model(input_seq) loss = criterion(logit, target) top1, top5 = calc_topk_accuracy(logit, target, (1, 5)) losses.update(loss.item(), B) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: progress.display(idx) args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.train_plotter.add_data('local/top5', top5_meter.local_avg, args.iteration) args.iteration += 1 print('Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), t=time.time() - tic)) args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) args.logger.log('train Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), t=time.time() - tic)) return losses.avg, top1_meter.avg