def train(train_loader, augmentation_gpu, criterion, G_criterion, netG, netD, optimizer_g, optimizer_d, epoch, args, writer): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses_g = AverageMeter('Loss_G', ':.4f') losses_d = AverageMeter('Loss_D', ':.4f') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses_g, losses_d, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode netG.train() netD.train() end = time.time() for i, (video, audio) in enumerate(train_loader): # measure data loading time # print('========================================') data_time.update(time.time() - end) if args.gpu is not None: video[0] = video[0].cuda(args.gpu, non_blocking=True) video[1] = video[1].cuda(args.gpu, non_blocking=True) video[0] = augmentation_gpu(video[0]) video[1] = augmentation_gpu(video[1]) im_q_fake = netG(video[0]) q_fake, q_real, output, target = netD(im_q_fake, im_q=video[0], im_k=video[1], t=args.temporal_decay) set_requires_grad([netD], False) # Ds require no gradients when optimizing Gs optimizer_g.zero_grad() # set generator's gradients to zero loss_g = -100 * G_criterion(q_fake, q_real) loss_g.backward(retain_graph=True) set_requires_grad([netD], True) optimizer_d.zero_grad() # set discriminator's gradients to zero loss_d = criterion(output, target) loss_d.backward() optimizer_g.step() # update generator's weights optimizer_d.step() # update discriminator's weights # acc1/acc5 are (K+1)-way contrast classifier accuracy # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses_g.update(loss_g.item(), video[0].size(0)) losses_d.update(loss_d.item(), video[0].size(0)) top1.update(acc1[0], video[0].size(0)) top5.update(acc5[0], video[0].size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) if writer is not None: total_iter = i + epoch * len(train_loader) writer.add_scalar('moco_train/loss', loss_d, total_iter) writer.add_scalar('moco_train/loss', loss_g, total_iter) writer.add_scalar('moco_train/acc1', acc1, total_iter) writer.add_scalar('moco_train/acc5', acc5, total_iter) writer.add_scalar('moco_train_avg/loss_g', losses_g.avg, total_iter) writer.add_scalar('moco_train_avg/loss_d', losses_d.avg, total_iter) writer.add_scalar('moco_train_avg/acc1', top1.avg, total_iter) writer.add_scalar('moco_train_avg/acc5', top5.avg, total_iter)
def validate(val_loader, model, criterion, log_every=1): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') acc = AverageMeter('Acc', ':6.4f') f1 = AverageMeter('F1', ':6.4f') prec = AverageMeter('Prec', ':6.4f') rec = AverageMeter('Recall', ':6.4f') progress = ProgressMeter( len(val_loader), [batch_time, losses, acc, f1, prec, rec], prefix='Test: ') # model.eval() evaluate mode highly decreases performance model.train() correct = 0 error = 0 precision = 0. recall = 0. with torch.no_grad(): end = time.time() for batch_no, (samples, targets) in enumerate(val_loader): # move data to gpu (or cpu if device is unavailable) samples = [t.to(device) for t in samples] targets = targets.squeeze(1).long().to(device) # compute output output = model(samples) # compute loss loss = criterion(output, targets) losses.update(loss.item(), targets.size(0)) # compute f1 score f, (p, r) = f1_score(output, targets.float()) f1.update(f) prec.update(p) rec.update(r) # compute accuracy acc.update(pixel_accuracy(output, targets), targets.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_no % log_every == 0: progress.display(batch_no) return acc.avg
def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (video, audio, target) in enumerate(val_loader): if args.gpu is not None: video = video.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(video) output = output.view(-1, args.clip_per_video, args.num_class) target = target.view(-1, args.clip_per_video) output = torch.mean(output, dim=1) # make sure 10 clips belong to the same video for j in range(1, args.clip_per_video): assert all(target[:, 0] == target[:, j]) target = target[:, 0] loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), video.size(0)) top1.update(acc1[0], video.size(0)) top5.update(acc5[0], video.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def train(train_loader:DataLoader, model:SegnetConvLSTM, criterion, optimizer, epoch, log_every=1): """ Do a training step, iterating over all batched samples as returned by the DataLoader passed as argument. Various measurements are taken and returned, such as accuracy, loss, precision, recall, f1 and batch time. """ batch_time = AverageMeter('BatchTime', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') acc = AverageMeter('Acc', ':6.4f') f1 = AverageMeter('F1', ':6.4f') prec = AverageMeter('Prec', ':6.4f') rec = AverageMeter('Recall', ':6.4f') progress = ProgressMeter( len(train_loader), [batch_time, data_time, losses, acc, f1, prec, rec], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for batch_no, (list_batched_samples, batched_targets) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # move data to gpu (or cpu if device is unavailable) list_batched_samples = [t.to(device) for t in list_batched_samples] batched_targets = batched_targets.long().to(device) # squeeze target channels to compute loss batched_targets = batched_targets.squeeze(1) # compute output output = model(list_batched_samples) # print("Output size:", output.size(), "Target size:", batched_targets.size()) # loss executes Sigmoid inside (efficiently) loss = criterion(output, batched_targets) # print("Train loss value:",loss.item()) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # detach output to compute metrics without storing computational graph output = output.detach() # record loss, dividing by sample size losses.update(loss.item(), batched_targets.size(0)) batched_targets = batched_targets.float() accuracy = pixel_accuracy(output, batched_targets) acc.update(accuracy, batched_targets.size(0)) f, (p, r) = f1_score(output, batched_targets) f1.update(f) prec.update(p) rec.update(r) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_no % log_every == 0: print("Output min", output.min().item(), "Output (softmax-ed) sum:", (output > 0.).float().sum().item(), "Output max:", torch.max(output).item()) print("Targets sum:", batched_targets.sum())#, "Targets max:", torch.max(batched_targets)) print("Base acc:{} - base prec: {}- base recall: {}- base f1: {}". format(pixel_accuracy(output, batched_targets), p, r, f)) progress.display(batch_no) # torch.cuda.empty_cache() return losses.avg, acc.avg, f1.avg
def train(train_loader, model, criterion, optimizer, epoch, args, writer): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) """ Switch to eval mode: Under the protocol of linear classification on frozen features/models, it is not legitimate to change any part of the pre-trained model. BatchNorm in train mode may revise running mean/std (even if it receives no gradient), which are part of the model parameters too. """ model.eval() end = time.time() for i, (video, audio, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.gpu is not None: video = video.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(video) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), video.size(0)) top1.update(acc1[0], video.size(0)) top5.update(acc5[0], video.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) if writer is not None: total_iter = i + epoch * len(train_loader) writer.add_scalar('lincls_train/loss', loss, total_iter) writer.add_scalar('lincls_train/acc1', acc1, total_iter) writer.add_scalar('lincls_train/acc5', acc5, total_iter) writer.add_scalar('lincls_train_avg/lr', optimizer.param_groups[0]['lr'], total_iter) writer.add_scalar('lincls_train_avg/loss', losses.avg, total_iter) writer.add_scalar('lincls_train_avg/acc1', top1.avg, total_iter) writer.add_scalar('lincls_train_avg/acc5', top5.avg, total_iter)