def train_one_epoch(data_loader, model, criterion, optimizer, lr_scheduler, transforms_cuda, epoch, args): batch_time = AverageMeter('Time',':.2f') data_time = AverageMeter('Data',':.2f') losses = AverageMeter('Loss',':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter], prefix='Epoch:[{}]'.format(epoch)) model.train() def tr(x): B = x.size(0) return transforms_cuda(x).view(B,3,args.num_seq,args.seq_len,args.img_dim,args.img_dim)\ .transpose(1,2).contiguous() end = time.time() for idx, input_seq in tqdm(enumerate(data_loader), total=len(data_loader)): data_time.update(time.time() - end) B = input_seq.size(0) input_seq = tr(input_seq.cuda(non_blocking=True)) output, target = model(input_seq) if args.model == 'infonce': # 'target' is the index of self loss = criterion(output, target) top1, top5 = calc_topk_accuracy(output, target, (1,5)) if args.model == 'ubernce': # 'target' is the binary mask # optimize all positive pairs, compute the mean for num_pos and for batch_size loss = - (F.log_softmax(output, dim=1) * target).sum(1) / target.sum(1) loss = loss.mean() top1, top5 = calc_mask_accuracy(output, target, (1,5)) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) losses.update(loss.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() progress.display(idx) if idx % args.print_freq == 0: if args.print: args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.iteration += 1 print('({gpu:1d})Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), gpu=args.rank, t=time.time()-tic)) if args.print: args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) return losses.avg, top1_meter.avg
def train_one_epoch(data_loader, model, criterion, optimizer, transforms_cuda, epoch, args): batch_time = AverageMeter('Time',':.2f') data_time = AverageMeter('Data',':.2f') losses = AverageMeter('Loss',':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') top1_self_meter = AverageMeter('Self-acc@1', ':.4f') top5_self_meter = AverageMeter('Self-acc@5', ':.4f') sacc_meter = AverageMeter('Sampling-Acc@%d' % args.topk, ':.2f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter, top1_self_meter, top5_self_meter, sacc_meter], prefix='Epoch:[{}]'.format(epoch)) model.train() model.module.sampler.eval() # the sampler is always fixed def tr(x): B = x.size(0) return transforms_cuda(x).view(B,3,args.num_seq,args.seq_len,args.img_dim,args.img_dim).transpose(1,2).contiguous() tic = time.time() end = time.time() for idx, (input_seq, vname, _) in enumerate(data_loader): data_time.update(time.time() - end) B = input_seq[0].size(0) input_seq = [tr(i.cuda(non_blocking=True)) for i in input_seq] vname = vname.cuda(non_blocking=True) output, mask = model(*input_seq, vname) mask_sum = mask.sum(1) loss = multi_nce_loss(output, mask) top1, top5 = calc_mask_accuracy(output, mask, (1,5)) top1_self, top5_self = calc_topk_accuracy(output, torch.zeros(B, dtype=torch.long).cuda(), (1,5)) del output losses.update(loss.item(), B) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) top1_self_meter.update(top1_self.item(), B) top5_self_meter.update(top5_self.item(), B) if model.module.queue_is_full: optimizer.zero_grad() loss.backward() optimizer.step() del loss torch.cuda.empty_cache() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: progress.display(idx) if args.print: args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.train_plotter.add_data('local/top5', top5_meter.local_avg, args.iteration) args.train_plotter.add_data('local/self-top1', top1_self_meter.local_avg, args.iteration) args.train_plotter.add_data('local/self-top5', top5_self_meter.local_avg, args.iteration) args.iteration += 1 print('({gpu:1d})Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), gpu=args.rank, t=time.time()-tic)) if args.print: args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) args.train_plotter.add_data('global/self-top1', top1_self_meter.avg, epoch) args.train_plotter.add_data('global/self-top5', top5_self_meter.avg, epoch) return losses.avg, top1_meter.avg
def train_one_epoch(data_loader, model, criterion, optimizer, transforms_cuda, epoch, args): batch_time = AverageMeter('Time', ':.2f') data_time = AverageMeter('Data', ':.2f') losses = AverageMeter('Loss', ':.4f') top1_meter = AverageMeter('acc@1', ':.4f') top5_meter = AverageMeter('acc@5', ':.4f') sacc_meter = AverageMeter('Sampling-Acc@%d' % args.topk, ':.2f') progress = ProgressMeter( len(data_loader), [batch_time, data_time, losses, top1_meter, top5_meter, sacc_meter], prefix='Epoch:[{}]'.format(epoch)) model.train() model.module.sampler.eval() # the sampler is always fixed def tr(x): B = x.size(0) return transforms_cuda(x).view(B, 3, args.num_seq, args.seq_len, args.img_dim, args.img_dim).transpose(1, 2).contiguous() end = time.time() for idx, (input_seq, vname, _) in enumerate(data_loader): data_time.update(time.time() - end) B = input_seq[0].size(0) input_seq = [tr(i.cuda(non_blocking=True)) for i in input_seq] vname = vname.cuda(non_blocking=True) output, mask = model(*input_seq, vname) mask_sum = mask.sum(1) if random.random() < 0.9: # because model has been pretrained with infoNCE, # in this stage, self-similarity is already very high, # randomly mask out the self-similarity for optimization efficiency, mask_clone = mask.clone() mask_clone[mask_sum != 1, 0] = 0 # mask out self-similarity loss = multi_nce_loss(output, mask_clone) else: loss = multi_nce_loss(output, mask) top1, top5 = calc_mask_accuracy(output, mask, (1, 5)) losses.update(loss.item(), B) top1_meter.update(top1.item(), B) top5_meter.update(top5.item(), B) optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if idx % args.print_freq == 0: progress.display(idx) if args.print: args.train_plotter.add_data('local/loss', losses.local_avg, args.iteration) args.train_plotter.add_data('local/top1', top1_meter.local_avg, args.iteration) args.train_plotter.add_data('local/top5', top5_meter.local_avg, args.iteration) args.iteration += 1 print('({gpu:1d})Epoch: [{0}][{1}/{2}]\t' 'T-epoch:{t:.2f}\t'.format(epoch, idx, len(data_loader), gpu=args.rank, t=time.time() - tic)) if args.print: args.train_plotter.add_data('global/loss', losses.avg, epoch) args.train_plotter.add_data('global/top1', top1_meter.avg, epoch) args.train_plotter.add_data('global/top5', top5_meter.avg, epoch) return losses.avg, top1_meter.avg