Exemple #1
0
def test_aggregate_results() -> None:
    """
    Test to make sure inference results are aggregated as expected
    """
    torch.manual_seed(1)
    num_models = 3
    # set expected posteriors
    model_results = []
    # create results for each model
    for x in range(num_models):
        posteriors = torch.nn.functional.softmax(torch.rand(3, 3, 3, 3),
                                                 dim=0).numpy()
        model_results.append(
            InferencePipeline.Result(
                epoch=0,
                patient_id=0,
                posteriors=posteriors,
                segmentation=posteriors_to_segmentation(posteriors),
                voxel_spacing_mm=(1, 1, 1)))

    # We calculate expected_posteriors before aggregating, as aggregation modifies model_results.
    expected_posteriors = np.mean([x.posteriors for x in model_results],
                                  axis=0)
    ensemble_result = EnsemblePipeline.aggregate_results(
        model_results, aggregation_type=EnsembleAggregationType.Average)

    assert ensemble_result.epoch == model_results[0].epoch
    assert ensemble_result.patient_id == model_results[0].patient_id

    assert np.array_equal(ensemble_result.posteriors, expected_posteriors)
    assert np.array_equal(ensemble_result.segmentation,
                          posteriors_to_segmentation(expected_posteriors))
Exemple #2
0
def test_store_inference_results(
        test_output_dirs: TestOutputDirectories) -> None:
    np.random.seed(0)
    num_classes = 2
    posterior = torch.nn.functional.softmax(torch.from_numpy(
        np.random.random_sample((num_classes, dim_z, dim_y, dim_x))),
                                            dim=0).numpy()
    segmentation = np.argmax(posterior, axis=0)
    assert segmentation.shape == (dim_z, dim_y, dim_x)

    posterior0 = to_unique_bytes(posterior[0], (0, 1))
    posterior1 = to_unique_bytes(posterior[1], (0, 1))
    spacing = (2.0, 2.0, 2.0)
    header = ImageHeader(origin=(0, 0, 0),
                         direction=(1, 0, 0, 0, 1, 0, 0, 0, 1),
                         spacing=spacing)
    inference_result = InferencePipeline.Result(epoch=1,
                                                patient_id=12,
                                                posteriors=posterior,
                                                segmentation=segmentation,
                                                voxel_spacing_mm=(1, 1, 1))

    test_config = _create_config_with_folders(test_output_dirs)

    assert test_config.class_and_index_with_background() == {
        "background": 0,
        "region": 1
    }

    results_folder = test_output_dirs.root_dir
    store_inference_results(inference_result, test_config,
                            Path(results_folder), header)

    assert_nifti_content(
        os.path.join(results_folder, "012", "posterior_background.nii.gz"),
        segmentation.shape, header, list(posterior0), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", "posterior_region.nii.gz"),
        segmentation.shape, header, list(posterior1), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", "background.nii.gz"),
        segmentation.shape, header, list([0, 1]), np.ubyte)

    assert_nifti_content(os.path.join(results_folder, "012", "region.nii.gz"),
                         segmentation.shape, header, list([0, 1]), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", DEFAULT_RESULT_IMAGE_NAME),
        segmentation.shape, header, list(np.unique(segmentation)), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", "uncertainty.nii.gz"),
        inference_result.uncertainty.shape, header, list([248, 249, 253,
                                                          254]), np.ubyte)
def test_check_inference_result(segmentation: Any, posteriors: Any, voxel_spacing_mm: Any) -> None:
    """
    Tests to make sure correct checks are made when creating results.
    :return:
    """
    with pytest.raises(Exception):
        InferencePipeline.Result(
            epoch=0,
            patient_id=0,
            segmentation=segmentation,
            posteriors=posteriors,
            voxel_spacing_mm=voxel_spacing_mm
        )
Exemple #4
0
def test_evaluate_model_predictions() -> None:
    """
    Creates an 'InferencePipeline.Result' object using pre-defined volumes, stores results and evaluates metrics.
    """
    # Patients 3, 4, and 5 are in test dataset such that:
    # Patient 3 has one missing ground truth channel: "region"
    # Patient 4 has all missing ground truth channels: "region", "region_1"
    # Patient 5 has no missing ground truth channels.
    input_list = [
        ["1", "train_and_test_data/id1_channel1.nii.gz", "channel1"],
        ["1", "train_and_test_data/id1_channel1.nii.gz", "channel2"],
        ["1", "train_and_test_data/id1_mask.nii.gz", "mask"],
        ["1", "train_and_test_data/id1_region.nii.gz", "region"],
        ["1", "train_and_test_data/id1_region.nii.gz", "region_1"],
        ["2", "train_and_test_data/id2_channel1.nii.gz", "channel1"],
        ["2", "train_and_test_data/id2_channel1.nii.gz", "channel2"],
        ["2", "train_and_test_data/id2_mask.nii.gz", "mask"],
        ["2", "train_and_test_data/id2_region.nii.gz", "region"],
        ["2", "train_and_test_data/id2_region.nii.gz", "region_1"],
        ["3", "train_and_test_data/id2_channel1.nii.gz", "channel1"],
        ["3", "train_and_test_data/id2_channel1.nii.gz", "channel2"],
        ["3", "train_and_test_data/id2_mask.nii.gz", "mask"],
        # ["3", "train_and_test_data/id2_region.nii.gz", "region"], # commented on purpose
        ["3", "train_and_test_data/id2_region.nii.gz", "region_1"],
        ["4", "train_and_test_data/id2_channel1.nii.gz", "channel1"],
        ["4", "train_and_test_data/id2_channel1.nii.gz", "channel2"],
        ["4", "train_and_test_data/id2_mask.nii.gz", "mask"],
        # ["4", "train_and_test_data/id2_region.nii.gz", "region"], # commented on purpose
        # ["4", "train_and_test_data/id2_region.nii.gz", "region_1"], # commented on purpose
        ["5", "train_and_test_data/id2_channel1.nii.gz", "channel1"],
        ["5", "train_and_test_data/id2_channel1.nii.gz", "channel2"],
        ["5", "train_and_test_data/id2_mask.nii.gz", "mask"],
        ["5", "train_and_test_data/id2_region.nii.gz", "region"],
        ["5", "train_and_test_data/id2_region.nii.gz", "region_1"]
    ]

    config = create_config_from_dataset(input_list,
                                        train=['1'],
                                        val=['2'],
                                        test=['3', '4', '5'])
    config.allow_incomplete_labels = True
    ds = config.get_torch_dataset_for_inference(ModelExecutionMode.TEST)
    results_folder = config.outputs_folder
    if not results_folder.is_dir():
        results_folder.mkdir()

    model_prediction_evaluations: List[Tuple[PatientMetadata,
                                             MetricsDict]] = []

    for sample_index, sample in enumerate(ds, 1):
        sample = Sample.from_dict(sample=sample)
        posteriors = np.zeros((3, ) + sample.mask.shape, 'float32')
        posteriors[0][:] = 0.2
        posteriors[1][:] = 0.6
        posteriors[2][:] = 0.2

        assert config.dataset_expected_spacing_xyz is not None

        inference_result = InferencePipeline.Result(
            patient_id=sample.patient_id,
            posteriors=posteriors,
            segmentation=np.argmax(posteriors, 0),
            voxel_spacing_mm=config.dataset_expected_spacing_xyz)
        store_inference_results(inference_result=inference_result,
                                config=config,
                                results_folder=results_folder,
                                image_header=sample.metadata.image_header)

        metadata, metrics_per_class = evaluate_model_predictions(
            sample_index - 1,
            config=config,
            dataset=ds,
            results_folder=results_folder)

        model_prediction_evaluations.append((metadata, metrics_per_class))

        # Patient 3 has one missing ground truth channel: "region"
        if sample.metadata.patient_id == '3':
            assert 'Dice' in metrics_per_class.values('region_1').keys()
            assert 'HausdorffDistance_millimeters' in metrics_per_class.values(
                'region_1').keys()
            assert 'MeanSurfaceDistance_millimeters' in metrics_per_class.values(
                'region_1').keys()
            for hue_name in ['region', 'Default']:
                for metric_type in metrics_per_class.values(hue_name).keys():
                    assert np.isnan(
                        metrics_per_class.values(hue_name)[metric_type]).all()

        # Patient 4 has all missing ground truth channels: "region", "region_1"
        if sample.metadata.patient_id == '4':
            for hue_name in ['region_1', 'region', 'Default']:
                for metric_type in metrics_per_class.values(hue_name).keys():
                    assert np.isnan(
                        metrics_per_class.values(hue_name)[metric_type]).all()

        # Patient 5 has no missing ground truth channels
        if sample.metadata.patient_id == '5':
            for metric_type in metrics_per_class.values('Default').keys():
                assert np.isnan(
                    metrics_per_class.values('Default')[metric_type]).all()
            for hue_name in ['region_1', 'region']:
                assert 'Dice' in metrics_per_class.values(hue_name).keys()
                assert 'HausdorffDistance_millimeters' in metrics_per_class.values(
                    hue_name).keys()
                assert 'MeanSurfaceDistance_millimeters' in metrics_per_class.values(
                    hue_name).keys()

    metrics_writer, average_dice = populate_metrics_writer(
        model_prediction_evaluations, config)
    # Patient 3 has only one missing ground truth channel
    assert not np.isnan(average_dice[0])
    assert np.isnan(float(metrics_writer.columns["Dice"][0]))
    assert not np.isnan(float(metrics_writer.columns["Dice"][1]))
    assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][0]))
    assert not np.isnan(
        float(metrics_writer.columns["HausdorffDistance_mm"][1]))
    assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][0]))
    assert not np.isnan(float(metrics_writer.columns["MeanDistance_mm"][1]))
    # Patient 4 has all missing ground truth channels
    assert np.isnan(average_dice[1])
    assert np.isnan(float(metrics_writer.columns["Dice"][2]))
    assert np.isnan(float(metrics_writer.columns["Dice"][3]))
    assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][2]))
    assert np.isnan(float(metrics_writer.columns["HausdorffDistance_mm"][3]))
    assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][2]))
    assert np.isnan(float(metrics_writer.columns["MeanDistance_mm"][3]))
    # Patient 5 has no missing ground truth channels.
    assert average_dice[2] > 0
    assert float(metrics_writer.columns["Dice"][4]) >= 0
    assert float(metrics_writer.columns["Dice"][5]) >= 0
    assert float(metrics_writer.columns["HausdorffDistance_mm"][4]) >= 0
    assert float(metrics_writer.columns["HausdorffDistance_mm"][5]) >= 0
    assert float(metrics_writer.columns["MeanDistance_mm"][4]) >= 0
    assert float(metrics_writer.columns["MeanDistance_mm"][5]) >= 0