def eval_epoch(val_loader, model, epoch, cfg): '''Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in config/defaults.py ''' if is_master_proc(): log.info('Testing..') model.eval() test_loss = 0.0 correct = total = 0.0 for batch_idx, (inputs, labels) in enumerate(val_loader): inputs, labels = inputs.cuda(non_blocking=True), labels.cuda() outputs = model(inputs) loss = F.cross_entropy(outputs, labels, reduction='mean') # Gather all predictions across all devices. if cfg.NUM_GPUS > 1: loss = all_reduce([loss])[0] outputs, labels = all_gather([outputs, labels]) # Accuracy. batch_correct = topks_correct(outputs, labels, (1, ))[0] correct += batch_correct.item() total += labels.size(0) if is_master_proc(): test_loss += loss.item() test_acc = correct / total log.info('Loss: %.3f | Acc: %.3f' % (test_loss / (batch_idx + 1), test_acc))
def test(model, device, test_loader): model.eval() correct = 0 with torch.no_grad(): for batch_id, (data, target) in enumerate(test_loader): data, target = data.cuda(), target.cuda() output = model(data) pred = output.argmax( dim=1, keepdim=True) # get the index of the max log-probability if args.gpus > 1: pred, target = du.all_gather([pred, target]) pred = pred.cpu() target = target.cpu() if dist.get_rank() == 0: correct += pred.eq(target.view_as(pred)).sum().item() print ("Test results: {}/{} {:.0f}% correct/all : {}/{}".\ format(batch_id * len(pred), len(test_loader.dataset),\ 100.0*batch_id / len(test_loader), correct, len(pred)*batch_id)) if dist.get_rank() == 0: print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format( correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
def perform_test(test_loader, model, test_meter, cfg): model.eval() test_meter.iter_tic() for cur_step, (inputs, labels, video_idx) in enumerate(test_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() video_idx = video_idx.cuda() preds = model(inputs) if cfg.NUM_GPUS > 1: preds, labels, video_idx = du.all_gather( [preds, labels, video_idx]) preds = preds.cpu() labels = labels.cpu() video_idx = video_idx.cpu() test_meter.iter_toc() test_meter.update_stats(preds.detach(), labels.detach(), video_idx.detach()) test_meter.log_iter_stats(cur_step) test_meter.iter_tic() test_meter.finalize_metrics() test_meter.reset()
def train_epoch(train_loader, model, optimizer, epoch, cfg): '''Epoch training. Args: train_loader (DataLoader): training data loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in config/defaults.py ''' if is_master_proc(): log.info('Epoch: %d' % epoch) model.train() num_batches = len(train_loader) train_loss = 0.0 correct = total = 0.0 for batch_idx, (inputs, labels) in enumerate(train_loader): inputs, labels = inputs.cuda(non_blocking=True), labels.cuda() # Update lr. lr = get_epoch_lr(cfg, epoch + float(batch_idx) / num_batches) set_lr(optimizer, lr) # Forward. outputs = model(inputs) loss = F.cross_entropy(outputs, labels, reduction='mean') # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Gather all predictions across all devices. if cfg.NUM_GPUS > 1: loss = all_reduce([loss])[0] outputs, labels = all_gather([outputs, labels]) # Accuracy. batch_correct = topks_correct(outputs, labels, (1, ))[0] correct += batch_correct.item() total += labels.size(0) if is_master_proc(): train_loss += loss.item() train_acc = correct / total log.info('Loss: %.3f | Acc: %.3f | LR: %.3f' % (train_loss / (batch_idx + 1), train_acc, lr))
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg): model.eval() val_meter.iter_tic() for cur_step, (inputs, labels, _) in enumerate(val_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) val_meter.iter_toc() val_meter.update_predictions(preds, labels) else: top1_err, top5_err = metrics.topk_errors(preds, labels, (1, 5)) if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() val_meter.update_stats( top1_err, top5_err, labels.size(0) * cfg.NUM_GPUS ) val_meter.log_iter_stats(cur_epoch, cur_step) val_meter.iter_tic() stats = val_meter.log_epoch_stats(cur_epoch) val_meter.reset() return stats
def perform_test(test_loader, model, test_meter, cfg, writer=None): """ For classification: Perform mutli-view testing that uniformly samples N clips from a video along its temporal axis. For each clip, it takes 3 crops to cover the spatial dimension, followed by averaging the softmax scores across all Nx3 views to form a video-level prediction. All video predictions are compared to ground-truth labels and the final testing performance is logged. For detection: Perform fully-convolutional testing on the full frames without crop. Args: test_loader (loader): video testing loader. model (model): the pretrained video model to test. test_meter (TestMeter): testing meters to log and ensemble the testing results. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter object, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable eval mode. model.eval() test_meter.iter_tic() for cur_iter, (inputs, labels, video_idx, meta) in enumerate(test_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Transfer the data to the current GPU device. labels = labels.cuda() video_idx = video_idx.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat(du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach().cpu(), ori_boxes.detach().cpu(), metadata.detach().cpu(), ) test_meter.log_iter_stats(None, cur_iter) else: # Perform the forward pass. preds = model(inputs) # Gather all the predictions across all the devices to perform ensemble. if cfg.NUM_GPUS > 1: preds, labels, video_idx = du.all_gather( [preds, labels, video_idx]) test_meter.iter_toc() # Update and log stats. test_meter.update_stats( preds.detach().cpu(), labels.detach().cpu(), video_idx.detach().cpu(), ) test_meter.log_iter_stats(cur_iter) test_meter.iter_tic() # Log epoch stats and print the final testing results. if writer is not None: all_preds_cpu = [ pred.clone().detach().cpu() for pred in test_meter.video_preds ] all_labels_cpu = [ label.clone().detach().cpu() for label in test_meter.video_labels ] writer.plot_eval(preds=all_preds_cpu, labels=all_labels_cpu) test_meter.finalize_metrics() test_meter.reset()
def eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer=None): """ Evaluate the model on the val set. Args: val_loader (loader): data loader to provide validation data. model (model): model to evaluate the performance. val_meter (ValMeter): meter instance to record and calculate the metrics. cur_epoch (int): number of the current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Evaluation mode enabled. The running stats would not be updated. model.eval() val_meter.iter_tic() for cur_iter, (inputs, labels, _, meta) in enumerate(val_loader): # Transferthe data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) preds = preds.cpu() ori_boxes = meta["ori_boxes"].cpu() metadata = meta["metadata"].cpu() if cfg.NUM_GPUS > 1: preds = torch.cat(du.all_gather_unaligned(preds), dim=0) ori_boxes = torch.cat( du.all_gather_unaligned(ori_boxes), dim=0) metadata = torch.cat(du.all_gather_unaligned(metadata), dim=0) val_meter.iter_toc() # Update and log stats. val_meter.update_stats( preds.cpu(), ori_boxes.cpu(), metadata.cpu()) else: preds = model(inputs) if cfg.DATA.MULTI_LABEL: if cfg.NUM_GPUS > 1: preds, labels = du.all_gather([preds, labels]) else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) # Combine the errors across the GPUs. top1_err, top5_err = [ (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct ] if cfg.NUM_GPUS > 1: top1_err, top5_err = du.all_reduce([top1_err, top5_err]) # Copy the errors from GPU to CPU (sync point). top1_err, top5_err = top1_err.item(), top5_err.item() val_meter.iter_toc() # Update and log stats. val_meter.update_stats( top1_err, top5_err, inputs[0].size(0) * cfg.NUM_GPUS ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( {"Val/Top1_err": top1_err, "Val/Top5_err": top5_err}, global_step=len(val_loader) * cur_epoch + cur_iter, ) val_meter.update_predictions(preds, labels) val_meter.log_iter_stats(cur_epoch, cur_iter) val_meter.iter_tic() # Log epoch stats. val_meter.log_epoch_stats(cur_epoch) # write to tensorboard format if available. if writer is not None: if cfg.DETECTION.ENABLE: writer.add_scalars( {"Val/mAP": val_meter.full_map}, global_step=cur_epoch ) all_preds_cpu = [pred.clone().detach().cpu() for pred in val_meter.all_preds] all_labels_cpu = [label.clone().detach().cpu() for label in val_meter.all_labels] writer.plot_eval( preds=all_preds_cpu, labels=all_labels_cpu, global_step=cur_epoch, ) val_meter.reset()