Exemple #1
0
def evaluate_language_model(
    model,
    test_loader,
    model_output_transform,
    send_data_to_device,
    device="cuda",
):
    n_steps, eval_loss = 0, 0

    iterator = tqdm.tqdm(test_loader, desc="Evaluation")

    with torch.no_grad():
        for i, labels in enumerate(iterator):

            labels, _ = send_data_to_device(labels, None, device=device)
            output = model(labels)

            if model_output_transform is not None:
                output = model_output_transform(output, None, model=model)

            shift_logits = output[..., :-1, :].contiguous()
            shift_labels = labels[..., 1:].contiguous()
            objective = CrossEntropyLoss(ignore_index=-1)
            loss = objective(
                shift_logits.view(-1, shift_logits.size(-1)),
                shift_labels.view(-1),
            )
            eval_loss += loss.item()
            n_steps += 1
            iterator.desc = (
                f"Eval loss: {eval_loss / n_steps} "
                f"ppl: {np.exp(eval_loss / n_steps)}"
            )

            if i == 0:  # for sotabench.com caching of evaluation
                run_hash = calculate_run_hash([eval_loss], output)
                # if we are in check model we don't need to go beyond the
                # first batch
                if in_check_mode():
                    iterator.close()
                    break

                # get the cached values from sotabench.com if available
                client = Client.public()
                cached_res = client.get_results_by_run_hash(run_hash)
                if cached_res:
                    iterator.close()
                    print(
                        "No model change detected (using the first batch "
                        "run_hash). Returning cached results."
                    )
                    return cached_res, run_hash

    return {"Perplexity": np.exp(eval_loss / n_steps)}, run_hash
Exemple #2
0
def evaluate_classification(model,
                            test_loader,
                            model_output_transform,
                            send_data_to_device,
                            device='cuda'):
    batch_time = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    end = time.time()

    iterator = tqdm.tqdm(test_loader, file=sys.stdout)

    with torch.no_grad():
        for i, (input, target) in enumerate(iterator):

            input, target = send_data_to_device(input, target, device=device)
            output = model(input)

            if model_output_transform is not None:
                output = model_output_transform(output, target)

            check_metric_inputs(output, target, test_loader.dataset, i)
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))
            batch_time.update(time.time() - end)
            end = time.time()

            if i == 0:  # for sotabench.com caching of evaluation
                run_hash = calculate_run_hash([prec1, prec5], output)
                # if we are in check model we don't need to go beyond the first batch
                if in_check_mode():
                    iterator.close()
                    break

                # get the cached values from sotabench.com if available
                client = Client.public()
                cached_res = client.get_results_by_run_hash(run_hash)
                if cached_res:
                    iterator.close()
                    print(
                        "No model change detected (using the first batch run_hash). Returning cached results."
                    )
                    return cached_res, run_hash

    return {
        'Top 1 Accuracy': top1.avg / 100,
        'Top 5 Accuracy': top5.avg / 100
    }, run_hash
Exemple #3
0
def evaluate_segmentation(
    model,
    test_loader,
    model_output_transform,
    send_data_to_device,
    num_classes,
    device="cuda",
):
    confmat = ConfusionMatrix(num_classes)

    iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)

    init_time = time.time()

    with torch.no_grad():
        for i, (input, target) in enumerate(iterator):
            input, target = send_data_to_device(input, target, device=device)
            output = model(input)
            output, target = model_output_transform(output, target)
            confmat.update(target, output)

            if i == 0:  # for sotabench.com caching of evaluation
                run_hash = calculate_run_hash([], output)
                # if we are in check model we don't need to go beyond the first
                # batch
                if in_check_mode():
                    iterator.close()
                    break

                # get the cached values from sotabench.com if available
                client = Client.public()
                cached_res = client.get_results_by_run_hash(run_hash)
                if cached_res:
                    iterator.close()
                    print(
                        "No model change detected (using the first batch run "
                        "hash). Returning cached results.")

                    speed_mem_metrics = {
                        "Tasks / Evaluation Time": None,
                        "Evaluation Time": None,
                        "Tasks": None,
                        "Max Memory Allocated (Total)": None,
                    }

                    return cached_res, speed_mem_metrics, run_hash

    exec_time = time.time() - init_time

    acc_global, acc, iu = confmat.compute()

    memory_allocated = torch.cuda.max_memory_allocated(device=device)
    torch.cuda.reset_max_memory_allocated(device=device)

    speed_mem_metrics = {
        "Tasks / Evaluation Time": len(test_loader.dataset) / exec_time,
        "Tasks": len(test_loader.dataset),
        "Evaluation Time": (time.time() - init_time),
        "Max Memory Allocated (Total)": memory_allocated,
    }

    return (
        {
            "Accuracy": acc_global.item(),
            "Mean IOU": iu.mean().item()
        },
        speed_mem_metrics,
        run_hash,
    )
Exemple #4
0
def evaluate_detection_coco(model,
                            test_loader,
                            model_output_transform,
                            send_data_to_device,
                            device="cuda",
                            force=False):

    coco = get_coco_api_from_dataset(test_loader.dataset)
    iou_types = ['bbox']
    coco_evaluator = CocoEvaluator(coco, iou_types)

    iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)

    init_time = time.time()

    with torch.no_grad():
        for i, (input, target) in enumerate(iterator):
            input, target = send_data_to_device(input, target, device=device)
            original_output = model(input)
            output, target = model_output_transform(original_output, target)

            result = {
                tar["image_id"].item(): out
                for tar, out in zip(target, output)
            }
            coco_evaluator.update(result)

            if i == 0:  # for sotabench.com caching of evaluation
                run_hash = calculate_run_hash([], original_output)
                # if we are in check model we don't need to go beyond the first
                # batch
                if in_check_mode():
                    iterator.close()
                    break

                if not force:
                    # get the cached values from sotabench.com if available
                    client = Client.public()
                    cached_res = client.get_results_by_run_hash(run_hash)
                    if cached_res:
                        iterator.close()
                        print(
                            "No model change detected (using the first batch run "
                            "hash). Returning cached results.")

                        speed_mem_metrics = {
                            'Tasks / Evaluation Time': None,
                            'Evaluation Time': None,
                            'Tasks': None,
                            'Max Memory Allocated (Total)': None,
                        }

                        return cached_res, speed_mem_metrics, run_hash

    exec_time = (time.time() - init_time)

    coco_evaluator.synchronize_between_processes()
    coco_evaluator.accumulate()
    coco_evaluator.summarize()

    memory_allocated = torch.cuda.max_memory_allocated(device=device)
    torch.cuda.reset_max_memory_allocated(device=device)

    speed_mem_metrics = {
        'Tasks / Evaluation Time': len(test_loader.dataset) / exec_time,
        'Tasks': len(test_loader.dataset),
        'Evaluation Time': (time.time() - init_time),
        'Max Memory Allocated (Total)': memory_allocated,
    }

    return (get_coco_metrics(coco_evaluator), speed_mem_metrics, run_hash)
Exemple #5
0
def evaluate_classification(
    model,
    test_loader,
    model_output_transform,
    send_data_to_device,
    device="cuda",
    force=False
):
    top1 = AverageMeter()
    top5 = AverageMeter()
    iterator = tqdm.tqdm(test_loader, desc="Evaluation", mininterval=5)

    init_time = time.time()

    with torch.no_grad():
        for i, (input, target) in enumerate(iterator):

            input, target = send_data_to_device(input, target, device=device)
            output = model(input)

            if model_output_transform is not None:
                output = model_output_transform(output, target, model=model)

            check_metric_inputs(output, target, test_loader.dataset, i)
            prec1, prec5 = accuracy(output, target, topk=(1, 5))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))

            if i == 0:  # for sotabench.com caching of evaluation
                run_hash = calculate_run_hash([prec1, prec5], output)
                # if we are in check model we don't need to go beyond the first
                # batch
                if in_check_mode():
                    iterator.close()
                    break

                if not force:
                    # get the cached values from sotabench.com if available
                    client = Client.public()
                    cached_res = client.get_results_by_run_hash(run_hash)
                    if cached_res:
                        iterator.close()
                        print(
                            "No model change detected (using the first batch run "
                            "hash). Returning cached results."
                        )

                        speed_mem_metrics = {
                            'Tasks / Evaluation Time': None,
                            'Evaluation Time': None,
                            'Tasks': None,
                            'Max Memory Allocated (Total)': None,
                        }

                        return cached_res, speed_mem_metrics, run_hash

    exec_time = (time.time() - init_time)

    memory_allocated = torch.cuda.max_memory_allocated(device=device)
    torch.cuda.reset_max_memory_allocated(device=device)

    speed_mem_metrics = {
        'Tasks / Evaluation Time': len(test_loader.dataset) / exec_time,
        'Tasks': len(test_loader.dataset),
        'Evaluation Time': (time.time() - init_time),
        'Max Memory Allocated (Total)': memory_allocated,
    }

    return (
        {"Top 1 Accuracy": top1.avg / 100,
         "Top 5 Accuracy": top5.avg / 100}, speed_mem_metrics,
        run_hash,
    )