def validate(model, loss_func, val_loader): losses = AverageMeter('Loss', ':4.4f') top1 = AverageMeter('Acc@1', ':4.2f') top5 = AverageMeter('Acc@5', ':4.2f') progress = ProgressMeter(len(val_loader), [losses, top1, top5], prefix='Validation: ') model.eval() with torch.no_grad(): for i, data in enumerate(val_loader, 1): imgs = data['image'].to(device) label = data['label'].to(device) out = model(imgs) loss = loss_func(out, label) acc1, acc5 = accuracy(out, label, topk=(1, 5)) losses.update(loss.item()) top1.update(acc1[0]) top5.update(acc5[0]) progress.display(i) return losses.avg
def train(model, optimizer, loss_func, train_loader, epoch): losses = AverageMeter('Loss', ':4.4f') top1 = AverageMeter('Acc@1', ':4.2f') top5 = AverageMeter('Acc@5', ':4.2f') progress = ProgressMeter(len(train_loader), [losses, top1, top5], prefix="Epoch: [{}]".format(epoch + 1)) model.train() for i, data in enumerate(train_loader, 1): imgs = data['image'].to(device) label = data['label'].to(device) out = model(imgs) loss = loss_func(out, label) acc1, acc5 = accuracy(out, label, topk=(1, 5)) losses.update(loss.item()) top1.update(acc1[0]) top5.update(acc5[0]) optimizer.zero_grad() loss.backward() optimizer.step() if i % PRINT_FREQ == 0: progress.display(i) timer = time() - TIME print("Total time Elapsed (H:m:s):", timedelta(seconds=timer))
def validate(val_loader, model, criterion, rank, args, logger, cfg): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for iter, valdata in enumerate(val_loader): data, label, _, meta= valdata if isinstance(data, (list,)): for i in range(len(data)): data[i] = data[i].cuda(rank) else: data = data.cuda(rank) label = label.cuda(rank) for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(rank) else: meta[key] = val.cuda(rank) # compute output if cfg.DETECTION.ENABLE: output = model(data, meta["boxes"]) else: output = model(data) loss = criterion(output, label) # measure accuracy and record loss acc1, acc5 = topks_correct(output, label, (1, 5)) # torch.distributed.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc1 = reduce_mean(acc1, args.nprocs) reduced_acc5 = reduce_mean(acc5, args.nprocs) losses.update(reduced_loss.item(), data[0].size(0)) top1.update(reduced_acc1.item(), data[0].size(0)) top5.update(reduced_acc5.item(), data[0].size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if iter % args.print_freq == 0: val_message = progress.display(iter) logger.info('Val Phase:' + val_message) # TODO: this should also be done with the ProgressMeter logger.info(' * Val Acc@1 {top1.avg:.3f} Val Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, cfgs): logger = logging.getLogger('{}.train'.format(cfgs['log_name'])) batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % cfgs['print_freq'] == 0: logger.info(progress.display(i))
def train(train_source_iter, train_target_iter, classifier, domain_adv, optimizer, lr_schedule, epoch, device, args): batch_time = AverageMeter('Time', ':5.2f') data_time = AverageMeter('Data', ':5.2f') losses = AverageMeter('Loss', ':6.2f') cls_accs = AverageMeter('Cls Acc', ':3.1f') domain_accs = AverageMeter('Domain Acc', ':3.1f') progress = ProgressMeter( args.iter_per_epoch, [batch_time, data_time, losses, cls_accs, domain_accs], prefix='Epoch: [{}]'.format(epoch)) classifier.train() end = time.time() for i in range(args.iter_per_epoch): x_s, label_s = next(train_source_iter) x_t, _ = next(train_target_iter) x_s, x_t = x_s.to(device), x_t.to(device) label_s = label_s.to(device) data_time.update(time.time() - end) #前向传播,计算loss很关键 x = torch.cat((x_s, x_t), dim=0) y, f = classifier(x) y_s, y_t = y.chunk(2, dim=0) f_s, f_t = f.chunk(2, dim=0) cls_loss = F.cross_entropy(y_s, label_s) ## 重点学习对抗的loss adv_loss = domain_adv(f_s, f_t) loss_total = cls_loss + args.trade_off * adv_loss ## 各种指标更新 cls_acc = accuracy(y_s, label_s)[0] domain_acc = domain_adv.domain_discriminator_accuracy losses.update(loss_total.item(), x_s.size(0)) cls_accs.update(cls_acc.item(), x_s.size(0)) domain_accs.update(domain_acc.item(), x_s.size(0)) ## 反向传播 optimizer.zero_grad() loss_total.backward() optimizer.step() ## 更新 lr_schedule.step() batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def validate(val_loader, model, criterion, cfgs): logger = logging.getLogger('{}.validate'.format(cfgs['log_name'])) batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() batch_time.update(time.time() - end) end = time.time() if i % cfgs['print_freq'] == 0: logger.info(progress.display(i)) # TODO: this should also be done with the ProgressMeter logger.info(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return top1.avg
def validate(data_loader, G, F1, F2, args): batch_time = AverageMeter('Time', ':6.3f') top1_1 = AverageMeter('Acc_1', ':6.2f') top1_2 = AverageMeter('Acc_2', ':6.2f') progress = ProgressMeter(len(data_loader), [batch_time, top1_1, top1_2], prefix='Test: ') G.eval() F1.eval() F2.eval() if args.per_class_eval: classes = data_loader.dataset.classes confmat = ConfusionMatrix(len(classes)) else: confmat = None with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(data_loader): images = images.to(device) target = target.to(device) g = G(images) y1, y2 = F1(g), F2(g) acc1, = accuracy(y1, target) acc2, = accuracy(y2, target) if confmat: confmat.update(target, y1.argmax(1)) top1_1.update(acc1.item(), images.size(0)) top1_2.update(acc2.item(), images.size(0)) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print(' * Acc1 {top1_1.avg:.3f} Acc2 {top1_2.avg:.3f}'.format(top1_1=top1_1, top1_2=top1_2)) if confmat: print(confmat.format(classes)) return top1_1.avg, top1_2.avg
def validate(dataloader, target_iter, classifier, device, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(target_iter), [batch_time, losses, top1, top5], prefix='Test: ') classifier.eval() if args.per_class_eval: classes = dataloader.dataset.classes confmat = ConfusionMatrix(len(classes)) else: confmat = None with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(dataloader): images, target = images.to(device), target.to(device) output, _ = classifier(images) loss = F.cross_entropy(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) if confmat: confmat.update(target, output.argmax(1)) losses.update(loss.item(), images.size(0)) top1.update(acc1.item(), images.size(0)) top5.update(acc5.item(), images.size(0)) batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) if confmat: print(confmat.format(classes)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, rank, args, logger, cfg): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for iter, traindata in enumerate(train_loader): data, label, _, meta = traindata if isinstance(data, (list,)): for i in range(len(data)): data[i] = data[i].cuda(rank) else: data = data.cuda(rank) label = label.cuda(rank) for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # measure data loading time data_time.update(time.time() - end) # compute output if cfg.DETECTION.ENABLE: output = model(data, meta["boxes"]) else: output = model(data) loss = criterion(output, label) adjust_lr(optimizer, epoch, cfg.SOLVER.BASE_LR) # measure accuracy and record loss acc1, acc5 = topks_correct(output, label, (1, 5)) torch.distributed.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc1 = reduce_mean(acc1, args.nprocs) reduced_acc5 = reduce_mean(acc5, args.nprocs) losses.update(reduced_loss.item(), data[0].size(0)) top1.update(reduced_acc1.item(), data[0].size(0)) top5.update(reduced_acc5.item(), data[0].size(0)) # compute gradient and do SGD step optimizer.zero_grad() # with amp.scale_loss(loss, optimizer) as scaled_loss: # scaled_loss.backward() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if iter % args.print_freq == 0: train_message = progress.display(iter) logger.info('Train Phase:' + train_message) return losses.avg
def train(train_source_iter, train_target_iter, G, F1, F2, optimizer_g, optimizer_f, epoch, args): batch_time = AverageMeter('Time', ':3.1f') data_time = AverageMeter('Data', ':3.1f') losses = AverageMeter('Loss', ':3.2f') trans_losses = AverageMeter('Trans Loss', ':3.2f') cls_accs = AverageMeter('Cls Acc', ':3.1f') tgt_accs = AverageMeter('Tgt Acc', ':3.1f') progress = ProgressMeter(args.iters_per_epoch, [batch_time, data_time, losses, trans_losses, cls_accs, tgt_accs], prefix="Epoch: [{}]".format(epoch)) G.train() F1.train() F2.train() end = time.time() for i in range(args.iters_per_epoch): x_s, labels_s = next(train_source_iter) x_t, labels_t = next(train_target_iter) x_s = x_s.to(device) x_t = x_t.to(device) labels_s = labels_s.to(device) labels_t = labels_t.to(device) x = torch.cat((x_s, x_t), dim=0) assert x.requires_grad is False data_time.update(time.time() - end) ###Step A train all networks to minimize loss on source domain,用source data训练两个分类器以及特征提取器 optimizer_g.zero_grad() optimizer_f.zero_grad() g = G(x) y_1 = F1(g) y_2 = F2(g) y1_s, y1_t = y_1.chunk(2, dim=0) y2_s, y2_t = y_2.chunk(2, dim=0) y1_t, y2_t = F.softmax(y1_t, dim=1), F.softmax(y2_t, dim=1) ### 关键部分 loss = F.cross_entropy(y1_s, labels_s) + F.cross_entropy(y2_s, labels_s) + 0.01 * ( entropy(y1_t) + entropy(y2_t)) loss.backward() optimizer_g.step() optimizer_f.step() # Step B train classifier to maximize discrepancy最大化分类器差异 optimizer_g.zero_grad() optimizer_f.zero_grad() g = G(x) y_1 = F1(g) y_2 = F2(g) y1_s, y1_t = y_1.chunk(2, dim=0) y2_s, y2_t = y_2.chunk(2, dim=0) y1_t, y2_t = F.softmax(y1_t, dim=1), F.softmax(y2_t, dim=1) ### 关键部分 loss = F.cross_entropy(y1_s, labels_s) + F.cross_entropy(y2_s, labels_s) + 0.01 * ( entropy(y1_t) + entropy(y2_t)) - \ classifier_discrepancy(y1_t, y2_t) * args.trade_off loss.backward() optimizer_f.step() # Step C train genrator to minimize discrepancy 最小化分类器差异 for k in range(args.num_k): # 训练G时,多迭代几次 optimizer_g.zero_grad() g = G(x) y_1 = F1(g) y_2 = F2(g) y1_s, y1_t = y_1.chunk(2, dim=0) y2_s, y2_t = y_2.chunk(2, dim=0) y1_t, y2_t = F.softmax(y1_t, dim=1), F.softmax(y2_t, dim=1) mcd_loss = classifier_discrepancy(y1_t, y2_t) * args.trade_off mcd_loss.backward() optimizer_g.step() ### 更新各个指标 cls_acc = accuracy(y1_s, labels_s)[0] tgt_acc = accuracy(y1_t, labels_t)[0] losses.update(loss.item(), x_s.size(0)) cls_accs.update(cls_acc.item(), x_s.size(0)) tgt_accs.update(tgt_acc.item(), x_t.size(0)) trans_losses.update(mcd_loss.item(), x_s.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i)
def train(train_loader, model, criterion, optimizer, epoch, rank, args, logger, cfg): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for iter, traindata in enumerate(train_loader): data, label = traindata if isinstance(data, (list, )): for i in range(len(data)): data[i] = data[i].cuda(rank) else: data = data.cuda(rank) # label = torch.nn.functional.one_hot(label, cfg.MODEL.NUM_CLASSES).float() label = label.cuda(rank) # measure data loading time data_time.update(time.time() - end) # compute output output = model(data) loss = criterion(output, label) adjust_lr(optimizer, epoch, cfg.SOLVER.BASE_LR, cfg) # measure accuracy and record loss # acc1, acc5 = topks_correct(output, label, (1, 5)) acc1 = accuracytop1(output, label, (1, )) acc5 = accuracytop5(output, label, (5, )) # torch.distributed.barrier() # reduced_loss = reduce_mean(loss, args.nprocs) # reduced_acc1 = reduce_mean(acc1, args.nprocs) # reduced_acc5 = reduce_mean(acc5, args.nprocs) # losses.update(reduced_loss.item(), data[0].size(0)) # top1.update(reduced_acc1.item(), data[0].size(0)) # top5.update(reduced_acc5.item(), data[0].size(0)) losses.update(loss.item(), data[0].size(0)) top1.update(acc1.item(), data[0].size(0)) top5.update(acc5.item(), data[0].size(0)) # compute gradient and do SGD step optimizer.zero_grad() # with amp.scale_loss(loss, optimizer) as scaled_loss: # scaled_loss.backward() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if iter % args.print_freq == 0: train_message = progress.display(iter) logger.info('Train Phase:' + train_message) return losses.avg
def train_model(model, dataloaders, device, criterion, optimizer, logger, print_freq, num_epochs, is_inception=False): """a simple train and evaluate script modified from https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html. Args: model (nn.Module): model to be trained. dataloaders (dict): should be a dict in the format of {'train': DataLoader, 'val': DataLoader}. device (Any): device. criterion (Any): loss function. optimizer (Any): optimizer. logger (Any): using logging.logger to print and log training information. print_freq (int): logging frequency.eg. 10 means logger will print information when 10 batches are trained or evaluated. num_epochs (int): training epochs is_inception (bool): please refer to https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 """ # Send the model to GPU model = model.to(device) since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(num_epochs): # Each epoch has a training and validation phase for phase in ['train', 'val']: # statistics batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.5f') top5 = AverageMeter('Acc@5', ':6.5f') progress = ProgressMeter( len(dataloaders[phase]), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode # Iterate over data. end = time.time() for i, (inputs, labels) in enumerate(dataloaders[phase]): # measure data loading time data_time.update(time.time() - end) inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): # Get model outputs and calculate loss # Special case for inception because in training it has an auxiliary output. In train # mode we calculate the loss by summing the final output and the auxiliary output # but in testing we only consider the final output. if is_inception and phase == 'train': # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 outputs, aux_outputs = model(inputs) loss1 = criterion(outputs, labels) loss2 = criterion(aux_outputs, labels) loss = loss1 + 0.4 * loss2 else: outputs = model(inputs) loss = criterion(outputs, labels) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # measure accuracy and record loss acc1 = accuracy(outputs, labels, top_k=1) acc5 = accuracy(outputs, labels, top_k=5) losses.update(loss.item(), inputs.size(0)) top1.update(acc1, inputs.size(0)) top5.update(acc5, inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: logger.info(progress.display(i)) time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) # load best model weights model.load_state_dict(best_model_wts) return model