def evaluate(model, data_loader, device, num_classes): model.eval() confmat = utils.ConfusionMatrix(num_classes) metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" num_processed_samples = 0 with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, 100, header): image, target = image.to(device), target.to(device) output = model(image) output = output["out"] confmat.update(target.flatten(), output.argmax(1).flatten()) # FIXME need to take into account that the datasets # could have been padded in distributed setup num_processed_samples += image.shape[0] confmat.reduce_from_all_processes() num_processed_samples = utils.reduce_across_processes( num_processed_samples) if (hasattr(data_loader.dataset, "__len__") and len(data_loader.dataset) != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") return confmat
def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" num_processed_samples = 0 with torch.inference_mode(): for video, target in metric_logger.log_every(data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size # gather the stats from all processes num_processed_samples = utils.reduce_across_processes(num_processed_samples) if isinstance(data_loader.sampler, DistributedSampler): # Get the len of UniformClipSampler inside DistributedSampler num_data_from_sampler = len(data_loader.sampler.dataset) else: num_data_from_sampler = len(data_loader.sampler) if ( hasattr(data_loader.dataset, "__len__") and num_data_from_sampler != num_processed_samples and torch.distributed.get_rank() == 0 ): # See FIXME above warnings.warn( f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet." ) metric_logger.synchronize_between_processes() print( " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}".format( top1=metric_logger.acc1, top5=metric_logger.acc5 ) ) return metric_logger.acc1.global_avg
def evaluate(model, criterion, data_loader, device, print_freq=100, log_suffix=""): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = f"Test: {log_suffix}" num_processed_samples = 0 with torch.inference_mode(): for image, target in metric_logger.log_every(data_loader, print_freq, header): image = image.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(image) loss = criterion(output, target) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = image.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size # gather the stats from all processes num_processed_samples = utils.reduce_across_processes( num_processed_samples) if (hasattr(data_loader.dataset, "__len__") and len(data_loader.dataset) != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") metric_logger.synchronize_between_processes() print( f"{header} Acc@1 {metric_logger.acc1.global_avg:.3f} Acc@5 {metric_logger.acc5.global_avg:.3f}" ) return metric_logger.acc1.global_avg
def _evaluate(model, args, val_dataset, *, padder_mode, num_flow_updates=None, batch_size=None, header=None): """Helper function to compute various metrics (epe, etc.) for a model on a given dataset. We process as many samples as possible with ddp, and process the rest on a single worker. """ batch_size = batch_size or args.batch_size device = torch.device(args.device) model.eval() if args.distributed: sampler = torch.utils.data.distributed.DistributedSampler( val_dataset, shuffle=False, drop_last=True) else: sampler = torch.utils.data.SequentialSampler(val_dataset) val_loader = torch.utils.data.DataLoader( val_dataset, sampler=sampler, batch_size=batch_size, pin_memory=True, num_workers=args.workers, ) num_flow_updates = num_flow_updates or args.num_flow_updates def inner_loop(blob): if blob[0].dim() == 3: # input is not batched so we add an extra dim for consistency blob = [x[None, :, :, :] if x is not None else None for x in blob] image1, image2, flow_gt = blob[:3] valid_flow_mask = None if len(blob) == 3 else blob[-1] image1, image2 = image1.to(device), image2.to(device) padder = utils.InputPadder(image1.shape, mode=padder_mode) image1, image2 = padder.pad(image1, image2) flow_predictions = model(image1, image2, num_flow_updates=num_flow_updates) flow_pred = flow_predictions[-1] flow_pred = padder.unpad(flow_pred).cpu() metrics, num_pixels_tot = utils.compute_metrics( flow_pred, flow_gt, valid_flow_mask) # We compute per-pixel epe (epe) and per-image epe (called f1-epe in RAFT paper). # per-pixel epe: average epe of all pixels of all images # per-image epe: average epe on each image independently, then average over images for name in ("epe", "1px", "3px", "5px", "f1"): # f1 is called f1-all in paper logger.meters[name].update(metrics[name], n=num_pixels_tot) logger.meters["per_image_epe"].update(metrics["epe"], n=batch_size) logger = utils.MetricLogger() for meter_name in ("epe", "1px", "3px", "5px", "per_image_epe", "f1"): logger.add_meter(meter_name, fmt="{global_avg:.4f}") num_processed_samples = 0 for blob in logger.log_every(val_loader, header=header, print_freq=None): inner_loop(blob) num_processed_samples += blob[0].shape[0] # batch size if args.distributed: num_processed_samples = utils.reduce_across_processes( num_processed_samples) print( f"Batch-processed {num_processed_samples} / {len(val_dataset)} samples. " "Going to process the remaining samples individually, if any.") if args.rank == 0: # we only need to process the rest on a single worker for i in range(num_processed_samples, len(val_dataset)): inner_loop(val_dataset[i]) logger.synchronize_between_processes() print(header, logger)
def evaluate(model, criterion, data_loader, device): model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test:" num_processed_samples = 0 # Group and aggregate output of a video num_videos = len(data_loader.dataset.samples) num_classes = len(data_loader.dataset.classes) agg_preds = torch.zeros((num_videos, num_classes), dtype=torch.float32, device=device) agg_targets = torch.zeros((num_videos), dtype=torch.int32, device=device) with torch.inference_mode(): for video, target, video_idx in metric_logger.log_every( data_loader, 100, header): video = video.to(device, non_blocking=True) target = target.to(device, non_blocking=True) output = model(video) loss = criterion(output, target) # Use softmax to convert output into prediction probability preds = torch.softmax(output, dim=1) for b in range(video.size(0)): idx = video_idx[b].item() agg_preds[idx] += preds[b].detach() agg_targets[idx] = target[b].detach().item() acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) # FIXME need to take into account that the datasets # could have been padded in distributed setup batch_size = video.shape[0] metric_logger.update(loss=loss.item()) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) num_processed_samples += batch_size # gather the stats from all processes num_processed_samples = utils.reduce_across_processes( num_processed_samples) if isinstance(data_loader.sampler, DistributedSampler): # Get the len of UniformClipSampler inside DistributedSampler num_data_from_sampler = len(data_loader.sampler.dataset) else: num_data_from_sampler = len(data_loader.sampler) if (hasattr(data_loader.dataset, "__len__") and num_data_from_sampler != num_processed_samples and torch.distributed.get_rank() == 0): # See FIXME above warnings.warn( f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} " "samples were used for the validation, which might bias the results. " "Try adjusting the batch size and / or the world size. " "Setting the world size to 1 is always a safe bet.") metric_logger.synchronize_between_processes() print( " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}". format(top1=metric_logger.acc1, top5=metric_logger.acc5)) # Reduce the agg_preds and agg_targets from all gpu and show result agg_preds = utils.reduce_across_processes(agg_preds) agg_targets = utils.reduce_across_processes( agg_targets, op=torch.distributed.ReduceOp.MAX) agg_acc1, agg_acc5 = utils.accuracy(agg_preds, agg_targets, topk=(1, 5)) print(" * Video Acc@1 {acc1:.3f} Video Acc@5 {acc5:.3f}".format( acc1=agg_acc1, acc5=agg_acc5)) return metric_logger.acc1.global_avg