def evaluate_language_model( model, test_loader, model_output_transform, send_data_to_device, device="cuda", ): n_steps, eval_loss = 0, 0 iterator = tqdm.tqdm(test_loader, desc="Evaluation") with torch.no_grad(): for i, labels in enumerate(iterator): labels, _ = send_data_to_device(labels, None, device=device) output = model(labels) if model_output_transform is not None: output = model_output_transform(output, None, model=model) shift_logits = output[..., :-1, :].contiguous() shift_labels = labels[..., 1:].contiguous() objective = CrossEntropyLoss(ignore_index=-1) loss = objective( shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1), ) eval_loss += loss.item() n_steps += 1 iterator.desc = ( f"Eval loss: {eval_loss / n_steps} " f"ppl: {np.exp(eval_loss / n_steps)}" ) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([eval_loss], output) # if we are in check model we don't need to go beyond the # first batch if in_check_mode(): iterator.close() break # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch " "run_hash). Returning cached results." ) return cached_res, run_hash return {"Perplexity": np.exp(eval_loss / n_steps)}, run_hash
def evaluate_classification(model, test_loader, model_output_transform, send_data_to_device, device='cuda'): batch_time = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() iterator = tqdm.tqdm(test_loader, file=sys.stdout) with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) output = model(input) if model_output_transform is not None: output = model_output_transform(output, target) check_metric_inputs(output, target, test_loader.dataset, i) prec1, prec5 = accuracy(output, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) batch_time.update(time.time() - end) end = time.time() if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([prec1, prec5], output) # if we are in check model we don't need to go beyond the first batch if in_check_mode(): iterator.close() break # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run_hash). Returning cached results." ) return cached_res, run_hash return { 'Top 1 Accuracy': top1.avg / 100, 'Top 5 Accuracy': top5.avg / 100 }, run_hash
def evaluate_segmentation( model, test_loader, model_output_transform, send_data_to_device, num_classes, device="cuda", ): confmat = ConfusionMatrix(num_classes) iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5) init_time = time.time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) output = model(input) output, target = model_output_transform(output, target) confmat.update(target, output) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([], output) # if we are in check model we don't need to go beyond the first # batch if in_check_mode(): iterator.close() break # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run " "hash). Returning cached results.") speed_mem_metrics = { "Tasks / Evaluation Time": None, "Evaluation Time": None, "Tasks": None, "Max Memory Allocated (Total)": None, } return cached_res, speed_mem_metrics, run_hash exec_time = time.time() - init_time acc_global, acc, iu = confmat.compute() memory_allocated = torch.cuda.max_memory_allocated(device=device) torch.cuda.reset_max_memory_allocated(device=device) speed_mem_metrics = { "Tasks / Evaluation Time": len(test_loader.dataset) / exec_time, "Tasks": len(test_loader.dataset), "Evaluation Time": (time.time() - init_time), "Max Memory Allocated (Total)": memory_allocated, } return ( { "Accuracy": acc_global.item(), "Mean IOU": iu.mean().item() }, speed_mem_metrics, run_hash, )
def evaluate_detection_coco(model, test_loader, model_output_transform, send_data_to_device, device="cuda", force=False): coco = get_coco_api_from_dataset(test_loader.dataset) iou_types = ['bbox'] coco_evaluator = CocoEvaluator(coco, iou_types) iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5) init_time = time.time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) original_output = model(input) output, target = model_output_transform(original_output, target) result = { tar["image_id"].item(): out for tar, out in zip(target, output) } coco_evaluator.update(result) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([], original_output) # if we are in check model we don't need to go beyond the first # batch if in_check_mode(): iterator.close() break if not force: # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run " "hash). Returning cached results.") speed_mem_metrics = { 'Tasks / Evaluation Time': None, 'Evaluation Time': None, 'Tasks': None, 'Max Memory Allocated (Total)': None, } return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time) coco_evaluator.synchronize_between_processes() coco_evaluator.accumulate() coco_evaluator.summarize() memory_allocated = torch.cuda.max_memory_allocated(device=device) torch.cuda.reset_max_memory_allocated(device=device) speed_mem_metrics = { 'Tasks / Evaluation Time': len(test_loader.dataset) / exec_time, 'Tasks': len(test_loader.dataset), 'Evaluation Time': (time.time() - init_time), 'Max Memory Allocated (Total)': memory_allocated, } return (get_coco_metrics(coco_evaluator), speed_mem_metrics, run_hash)
def evaluate_classification( model, test_loader, model_output_transform, send_data_to_device, device="cuda", force=False ): top1 = AverageMeter() top5 = AverageMeter() iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5) init_time = time.time() with torch.no_grad(): for i, (input, target) in enumerate(iterator): input, target = send_data_to_device(input, target, device=device) output = model(input) if model_output_transform is not None: output = model_output_transform(output, target, model=model) check_metric_inputs(output, target, test_loader.dataset, i) prec1, prec5 = accuracy(output, target, topk=(1, 5)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) if i == 0: # for sotabench.com caching of evaluation run_hash = calculate_run_hash([prec1, prec5], output) # if we are in check model we don't need to go beyond the first # batch if in_check_mode(): iterator.close() break if not force: # get the cached values from sotabench.com if available client = Client.public() cached_res = client.get_results_by_run_hash(run_hash) if cached_res: iterator.close() print( "No model change detected (using the first batch run " "hash). Returning cached results." ) speed_mem_metrics = { 'Tasks / Evaluation Time': None, 'Evaluation Time': None, 'Tasks': None, 'Max Memory Allocated (Total)': None, } return cached_res, speed_mem_metrics, run_hash exec_time = (time.time() - init_time) memory_allocated = torch.cuda.max_memory_allocated(device=device) torch.cuda.reset_max_memory_allocated(device=device) speed_mem_metrics = { 'Tasks / Evaluation Time': len(test_loader.dataset) / exec_time, 'Tasks': len(test_loader.dataset), 'Evaluation Time': (time.time() - init_time), 'Max Memory Allocated (Total)': memory_allocated, } return ( {"Top 1 Accuracy": top1.avg / 100, "Top 5 Accuracy": top5.avg / 100}, speed_mem_metrics, run_hash, )