Example #1
0
def test_run_scoring(test_output_dirs: OutputFolderForTests,
                     is_ensemble: bool) -> None:
    """
    Run the scoring script on an image file.
    This test lives outside the normal Tests folder because it imports "score.py" from the repository root folder.
    If we switched to InnerEye as a package, we would have to treat this import special.
    The inference run here is on a 1-channel model, whereas test_register_and_score_model works with a 2-channel
    model.
    """
    seed_everything(42)
    checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt"
    image_size = (40, 40, 40)
    test_crop_size = image_size
    dummy_config = DummyModel()
    dummy_config.test_crop_size = test_crop_size
    dummy_config.inference_stride_size = (10, 10, 10)
    dummy_config.inference_batch_size = 10
    create_model_and_store_checkpoint(dummy_config, checkpoint)
    all_paths = [checkpoint] * 2 if is_ensemble else [checkpoint]
    inference_pipeline, dummy_config = create_inference_pipeline(dummy_config,
                                                                 all_paths,
                                                                 use_gpu=False)
    image_with_header = io_util.load_nifti_image(test_image)
    image_with_header.image = image_with_header.image[:image_size[
        0], :image_size[1], :image_size[2]]
    result = run_inference([image_with_header, image_with_header],
                           inference_pipeline, dummy_config)
    assert image_with_header.image.shape == result.shape  # type: ignore
    print(f"Unique result values: {np.unique(result)}")
    assert np.all(result == 1)
Example #2
0
def test_create_from_checkpoint_ensemble(
        test_output_dirs: OutputFolderForTests) -> None:
    config = ClassificationModelForTesting()

    path_to_checkpoint_non_exist = test_output_dirs.root_dir / "does_not_exist.ckpt"
    path_to_checkpoint_exist = test_output_dirs.root_dir / "foo.ckpt"
    create_model_and_store_checkpoint(config, path_to_checkpoint_exist)

    # when all checkpoints do not exist, raise error
    with pytest.raises(ValueError):
        paths_to_checkpoint = [path_to_checkpoint_non_exist] * 5
        ScalarEnsemblePipeline.create_from_checkpoint(paths_to_checkpoint,
                                                      config)

    # when a few checkpoints exist, ensemble with those
    paths_to_checkpoint = [path_to_checkpoint_non_exist
                           ] * 3 + [path_to_checkpoint_exist] * 2
    inference_pipeline = ScalarEnsemblePipeline.create_from_checkpoint(
        paths_to_checkpoint, config)
    assert isinstance(inference_pipeline, ScalarEnsemblePipeline)
    assert len(inference_pipeline.pipelines) == 2

    # when all checkpoints exist
    paths_to_checkpoint = [path_to_checkpoint_exist] * 5
    inference_pipeline = ScalarEnsemblePipeline.create_from_checkpoint(
        paths_to_checkpoint, config)
    assert isinstance(inference_pipeline, ScalarEnsemblePipeline)
    assert len(inference_pipeline.pipelines) == 5
Example #3
0
def test_score_image_dicom_mock_none(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works.

    Finally there is no mocking and full image scoring is run using the PassThroughModel.

    :param test_output_dirs: Test output directories.
    """
    model_config = PassThroughModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = zip_dicom_series(model_folder)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True)

    segmentation = score_image(score_pipeline_config)
    assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder)
Example #4
0
def test_create_inference_pipeline(
        config: ModelConfigBase, expected_inference_type: type,
        expected_ensemble_type: type,
        test_output_dirs: OutputFolderForTests) -> None:
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = test_output_dirs.root_dir / "checkpoint.ckpt"
    create_model_and_store_checkpoint(config, checkpoint_path)
    inference = create_inference_pipeline(config, [checkpoint_path])
    assert isinstance(inference, expected_inference_type)
    ensemble = create_inference_pipeline(config, [checkpoint_path] * 2)
    assert isinstance(ensemble, expected_ensemble_type)
Example #5
0
def test_create_from_checkpoint_non_ensemble(
        test_output_dirs: OutputFolderForTests) -> None:
    config = ClassificationModelForTesting()

    # when checkpoint does not exist, return None
    path_to_checkpoint = test_output_dirs.root_dir / "foo.ckpt"
    inference_pipeline = ScalarInferencePipeline.create_from_checkpoint(
        path_to_checkpoint, config)
    assert inference_pipeline is None

    create_model_and_store_checkpoint(config, path_to_checkpoint)
    inference_pipeline = ScalarInferencePipeline.create_from_checkpoint(
        path_to_checkpoint, config)
    assert isinstance(inference_pipeline, ScalarInferencePipeline)
Example #6
0
def test_score_image_dicom_mock_run_store(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works, by mocking out run and store functions.

    This mocks out run_inference and store_as_ubyte_nifti so that init_from_model_inference_json
    is tested in addition to the tests in test_score_image_dicom_mock_all.

    :param test_output_dirs: Test output directories.
    """
    mock_segmentation = {'mock_segmentation': True}
    model_config = DummyModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = test_output_dirs.root_dir / "temp_pack_dicom_series" / "dicom_series.zip"
    zip_known_dicom_series(zipped_dicom_series_path)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True,
        model_id="Dummy:1")

    with mock.patch('score.run_inference',
                    return_value=mock_segmentation) as mock_run_inference:
        with mock.patch(
                'score.store_as_ubyte_nifti',
                return_value=HNSEGMENTATION_FILE) as mock_store_as_ubyte_nifti:
            segmentation = score_image(score_pipeline_config)
            assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED,
                                     model_folder)

    mock_run_inference.assert_called()
    mock_store_as_ubyte_nifti.assert_called()
Example #7
0
def test_score_image_dicom_mock_run(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works, by mocking out only the run scoring function.

    This mocks out run_inference so that store_as_ubyte_nifti
    is tested in addition to the tests in test_score_image_dicom_mock_run_store.

    :param test_output_dirs: Test output directories.
    """
    model_config = DummyModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = zip_dicom_series(model_folder)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True)

    image_with_header = io_util.load_nifti_image(HNSEGMENTATION_FILE)

    with mock.patch(
            'score.run_inference',
            return_value=image_with_header.image) as mock_run_inference:
        segmentation = score_image(score_pipeline_config)
        assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED,
                                 model_folder)

    mock_run_inference.assert_called()
Example #8
0
def test_runner_restart(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if starting training from a folder where the checkpoints folder already has recovery checkpoints picks up
    that it is a recovery run. Also checks that we update the start epoch in the config at loading time.
    """
    model_config = DummyClassification()
    model_config.set_output_to(test_output_dirs.root_dir)
    model_config.num_epochs = FIXED_EPOCH + 2
    # We save all checkpoints - if recovery works as expected we should have a new checkpoint for epoch 4, 5.
    model_config.recovery_checkpoint_save_interval = 1
    model_config.recovery_checkpoints_save_last_k = -1
    runner = MLRunner(model_config=model_config)
    runner.setup(use_mount_or_download_dataset=False)
    # Epochs are 0 based for saving
    create_model_and_store_checkpoint(model_config,
                                      runner.container.checkpoint_folder /
                                      f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch="
                                      f"{FIXED_EPOCH - 1}{CHECKPOINT_SUFFIX}",
                                      weights_only=False)
    azure_config = get_default_azure_config()
    checkpoint_handler = CheckpointHandler(
        azure_config=azure_config,
        container=runner.container,
        project_root=test_output_dirs.root_dir)
    _, storing_logger = model_train(checkpoint_handler=checkpoint_handler,
                                    container=runner.container)
    # We expect to have 4 checkpoints, FIXED_EPOCH (recovery), FIXED_EPOCH+1, FIXED_EPOCH and best.
    assert len(os.listdir(runner.container.checkpoint_folder)) == 4
    assert (runner.container.checkpoint_folder /
            f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch="
            f"{FIXED_EPOCH - 1}{CHECKPOINT_SUFFIX}").exists()
    assert (runner.container.checkpoint_folder /
            f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch="
            f"{FIXED_EPOCH}{CHECKPOINT_SUFFIX}").exists()
    assert (runner.container.checkpoint_folder /
            f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch="
            f"{FIXED_EPOCH + 1}{CHECKPOINT_SUFFIX}").exists()
    assert (runner.container.checkpoint_folder /
            BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).exists()
    # Check that we really restarted epoch from epoch FIXED_EPOCH.
    assert list(storing_logger.epochs) == [FIXED_EPOCH,
                                           FIXED_EPOCH + 1]  # type: ignore
def test_invalid_stride_size(test_output_dirs: OutputFolderForTests) -> None:
    config = SegmentationModelBase(
        architecture="UNet3D",
        feature_channels=[1],
        crop_size=(64, 64, 64),
        test_crop_size=(80, 80, 80),
        image_channels=["mr"],
        ground_truth_ids=["tumour_mass", "subtract"],
        train_batch_size=8,
        inference_batch_size=1,
        inference_stride_size=(120, 120, 120),
        should_validate=False
    )
    config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = test_output_dirs.root_dir / "checkpoint.ckpt"
    create_model_and_store_checkpoint(config, checkpoint_path)

    with pytest.raises(ValueError) as ex:
        load_from_checkpoint_and_adjust_for_inference(config=config, checkpoint_path=checkpoint_path)

    assert "The inference stride size (120, 120, 120) must be smaller" in ex.value.args[0]
    assert str(config.inference_stride_size) in ex.value.args[0]
    assert str(config.test_crop_size) in ex.value.args[0]
Example #10
0
def test_model_inference_train_and_test(
        test_output_dirs: OutputFolderForTests, perform_cross_validation: bool,
        perform_training_set_inference: bool) -> None:
    config = DummyModel()
    config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0
    config.perform_training_set_inference = perform_training_set_inference
    # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds.
    config.is_plotting_enabled = common_util.is_linux()

    config.set_output_to(test_output_dirs.root_dir)
    config.local_dataset = full_ml_test_data_path()

    checkpoint_path = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    create_model_and_store_checkpoint(config, checkpoint_path)
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    checkpoint_handler.additional_training_done()
    result, _, _ = MLRunner(config).model_inference_train_and_test(
        checkpoint_handler=checkpoint_handler)
    if result is None:
        raise ValueError("Error result cannot be None")
    assert isinstance(result, InferenceMetricsForSegmentation)
    epoch_folder_name = common_util.BEST_EPOCH_FOLDER_NAME
    for folder in [
            ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value,
            ModelExecutionMode.TEST.value
    ]:
        results_folder = config.outputs_folder / epoch_folder_name / folder
        folder_exists = results_folder.is_dir()
        if folder in [
                ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value
        ]:
            if perform_training_set_inference:
                assert folder_exists
        else:
            assert folder_exists
Example #11
0
def run_model_inference_train_and_test(
        test_output_dirs: OutputFolderForTests,
        perform_cross_validation: bool,
        inference_on_train_set: Optional[bool] = None,
        inference_on_val_set: Optional[bool] = None,
        inference_on_test_set: Optional[bool] = None,
        ensemble_inference_on_train_set: Optional[bool] = None,
        ensemble_inference_on_val_set: Optional[bool] = None,
        ensemble_inference_on_test_set: Optional[bool] = None,
        model_proc: ModelProcessing = ModelProcessing.DEFAULT) -> None:
    """
    Test running inference produces expected output metrics, files, folders and calls to upload_folder.

    :param test_output_dirs: Test output directories.
    :param perform_cross_validation: Whether to test with cross validation.
    :param inference_on_train_set: Override for inference on train data sets.
    :param inference_on_val_set: Override for inference on validation data sets.
    :param inference_on_test_set: Override for inference on test data sets.
    :param ensemble_inference_on_train_set: Override for ensemble inference on train data sets.
    :param ensemble_inference_on_val_set: Override for ensemble inference on validation data sets.
    :param ensemble_inference_on_test_set: Override for ensemble inference on test data sets.
    :param model_proc: Model processing to test.
    :return: None.
    """
    dummy_model = DummyModel()

    config = PassThroughModel()
    # Copy settings from DummyModel
    config.image_channels = dummy_model.image_channels
    config.ground_truth_ids = dummy_model.ground_truth_ids
    config.ground_truth_ids_display_names = dummy_model.ground_truth_ids_display_names
    config.colours = dummy_model.colours
    config.fill_holes = dummy_model.fill_holes
    config.roi_interpreted_types = dummy_model.roi_interpreted_types

    config.test_crop_size = (16, 16, 16)
    config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0
    config.inference_on_train_set = inference_on_train_set
    config.inference_on_val_set = inference_on_val_set
    config.inference_on_test_set = inference_on_test_set
    config.ensemble_inference_on_train_set = ensemble_inference_on_train_set
    config.ensemble_inference_on_val_set = ensemble_inference_on_val_set
    config.ensemble_inference_on_test_set = ensemble_inference_on_test_set
    # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds.
    config.is_plotting_enabled = common_util.is_linux()

    config.set_output_to(test_output_dirs.root_dir)
    train_and_test_data_small_dir = test_output_dirs.root_dir / "train_and_test_data_small"
    config.local_dataset = create_train_and_test_data_small_dataset(
        config.test_crop_size, full_ml_test_data_path(), "train_and_test_data",
        train_and_test_data_small_dir, "data")

    checkpoint_path = config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX
    create_model_and_store_checkpoint(config, checkpoint_path)
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    checkpoint_handler.additional_training_done()

    mock_upload_path = test_output_dirs.root_dir / "mock_upload"
    mock_upload_path.mkdir()

    run = create_mock_run(mock_upload_path, config)

    azure_config = Mock(name='mock_azure_config')
    azure_config.fetch_run.return_value = run

    runner = MLRunner(model_config=config, azure_config=azure_config)

    with mock.patch("InnerEye.ML.model_testing.PARENT_RUN_CONTEXT", run):
        metrics = runner.model_inference_train_and_test(
            checkpoint_paths=checkpoint_handler.get_checkpoints_to_test(),
            model_proc=model_proc)

    if model_proc == ModelProcessing.ENSEMBLE_CREATION:
        # Create a fake ensemble dataset.csv
        dataset_df = create_dataset_df()
        dataset_df.to_csv(config.outputs_folder / DATASET_CSV_FILE_NAME)

        with mock.patch.object(PlotCrossValidationConfig,
                               'azure_config',
                               return_value=azure_config):
            with mock.patch("InnerEye.Azure.azure_util.PARENT_RUN_CONTEXT",
                            run):
                with mock.patch("InnerEye.ML.run_ml.PARENT_RUN_CONTEXT", run):
                    runner.plot_cross_validation_and_upload_results()
                    runner.generate_report(ModelProcessing.ENSEMBLE_CREATION)

    if model_proc == ModelProcessing.DEFAULT:
        named_metrics = {
            ModelExecutionMode.TRAIN: inference_on_train_set,
            ModelExecutionMode.TEST: inference_on_test_set,
            ModelExecutionMode.VAL: inference_on_val_set
        }
    else:
        named_metrics = {
            ModelExecutionMode.TRAIN: ensemble_inference_on_train_set,
            ModelExecutionMode.TEST: ensemble_inference_on_test_set,
            ModelExecutionMode.VAL: ensemble_inference_on_val_set
        }

    error = ''
    expected_upload_folder_count = 0
    for mode, flag in named_metrics.items():
        if mode in metrics:
            metric = metrics[mode]
            assert isinstance(metric, InferenceMetricsForSegmentation)

        if flag is None:
            # No override supplied, calculate the expected default:
            if model_proc == ModelProcessing.DEFAULT:
                if not perform_cross_validation:
                    # If a "normal" run then default to val or test.
                    flag = mode == ModelExecutionMode.TEST
                else:
                    # If an ensemble child then default to never.
                    flag = False
            else:
                # If an ensemble then default to test only.
                flag = mode == ModelExecutionMode.TEST

        if mode in metrics and not flag:
            error = error + f"Error: {mode.value} cannot be not None."
        elif mode not in metrics and flag:
            error = error + f"Error: {mode.value} cannot be None."
        results_folder = config.outputs_folder / get_best_epoch_results_path(
            mode, model_proc)
        folder_exists = results_folder.is_dir()
        assert folder_exists == flag
        if flag and model_proc == ModelProcessing.ENSEMBLE_CREATION:
            expected_upload_folder_count = expected_upload_folder_count + 1
            expected_name = get_best_epoch_results_path(
                mode, ModelProcessing.DEFAULT)
            run.upload_folder.assert_any_call(name=str(expected_name),
                                              path=str(results_folder))
    if len(error):
        raise ValueError(error)

    if model_proc == ModelProcessing.ENSEMBLE_CREATION:
        # The report should have been mock uploaded
        expected_upload_folder_count = expected_upload_folder_count + 1

    assert run.upload_folder.call_count == expected_upload_folder_count
Example #12
0
def test_model_test(test_output_dirs: OutputFolderForTests) -> None:
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
    seed_everything(42)
    config = DummyModel()
    config.set_output_to(test_output_dirs.root_dir)
    placeholder_dataset_id = "place_holder_dataset_id"
    config.azure_dataset_id = placeholder_dataset_id
    transform = config.get_full_image_sample_transforms().test
    df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))
    df = df[df.subject.isin([1, 2])]
    # noinspection PyTypeHints
    config._datasets_for_inference = \
        {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)}  # type: ignore
    execution_mode = ModelExecutionMode.TEST
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    create_model_and_store_checkpoint(
        config,
        config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX)
    checkpoint_handler.additional_training_done()
    inference_results = model_testing.segmentation_model_test(
        config,
        data_split=execution_mode,
        checkpoint_handler=checkpoint_handler)
    epoch_dir = config.outputs_folder / get_best_epoch_results_path(
        execution_mode)
    assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6)

    assert config.outputs_folder.is_dir()
    assert epoch_dir.is_dir()
    patient1 = io_util.load_nifti_image(train_and_test_data_dir /
                                        "id1_channel1.nii.gz")
    patient2 = io_util.load_nifti_image(train_and_test_data_dir /
                                        "id2_channel1.nii.gz")

    assert_file_contains_string(epoch_dir / DATASET_ID_FILE,
                                placeholder_dataset_id)
    assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
    assert_text_files_match(
        epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME,
        train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME)
    assert_text_files_match(
        epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
        train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
    # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
    assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

    assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz",
                         get_image_shape(patient1), patient1.header, [137],
                         np.ubyte)
    assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz",
                         get_image_shape(patient2), patient2.header, [137],
                         np.ubyte)
    assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME,
                         get_image_shape(patient1), patient1.header, [1],
                         np.ubyte)
    assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME,
                         get_image_shape(patient2), patient2.header, [1],
                         np.ubyte)
    assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz",
                         get_image_shape(patient1), patient1.header, [117],
                         np.ubyte)
    assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz",
                         get_image_shape(patient2), patient2.header, [117],
                         np.ubyte)
    thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER
    assert thumbnails_folder.is_dir()
    png_files = list(thumbnails_folder.glob("*.png"))
    overlays = [f for f in png_files if "_region_slice_" in str(f)]
    assert len(overlays) == len(df.subject.unique(
    )), "There should be one overlay/contour file per subject"

    # Writing dataset.csv normally happens at the beginning of training,
    # but this test reads off a saved checkpoint file.
    # Dataset.csv must be present for plot_cross_validation.
    config.write_dataset_files()
    # Test if the metrics files can be picked up correctly by the cross validation code
    config_and_files = get_config_and_results_for_offline_runs(config)
    result_files = config_and_files.files
    assert len(result_files) == 1
    for file in result_files:
        assert file.execution_mode == execution_mode
        assert file.dataset_csv_file is not None
        assert file.dataset_csv_file.exists()
        assert file.metrics_file is not None
        assert file.metrics_file.exists()
Example #13
0
def inference_identity(
        test_output_dirs: OutputFolderForTests,
        image_size: Any = (4, 5, 8),
        crop_size: Any = (5, 5, 5),
        shrink_by: Any = (0, 0, 0),
        num_classes: int = 5,
        create_mask: bool = True,
        extract_largest_foreground_connected_component: bool = False,
        is_ensemble: bool = False,
        posterior_smoothing_mm: Any = None) -> None:
    """
    Test to make sure inference pipeline is identity preserving, ie: we can recreate deterministic
    model output, ensuring the patching and stitching is robust.
    """
    # fix random seed
    np.random.seed(0)

    ground_truth_ids = list(map(str, range(num_classes)))
    # image to run inference on: The mock model passes the input image through, hence the input
    # image must have as many channels as we have classes (plus background), such that the output is
    # also a valid posterior.
    num_channels = num_classes + 1
    image_channels = np.random.randn(num_channels, *list(image_size))
    # create a random mask if required
    mask = np.round(np.random.uniform(
        size=image_size)).astype(np.int) if create_mask else None
    config = InferenceIdentityModel(shrink_by=shrink_by)
    config.crop_size = crop_size
    config.test_crop_size = crop_size
    config.image_channels = list(map(str, range(num_channels)))
    config.ground_truth_ids = ground_truth_ids
    config.posterior_smoothing_mm = posterior_smoothing_mm

    # We have to set largest_connected_component_foreground_classes after creating the model config,
    # because this parameter is not overridable and hence will not be set by GenericConfig's constructor.
    if extract_largest_foreground_connected_component:
        config.largest_connected_component_foreground_classes = [
            (c, None) for c in ground_truth_ids
        ]
    # set expected posteriors
    expected_posteriors = torch.nn.functional.softmax(
        torch.tensor(image_channels), dim=0).numpy()
    # apply the mask if required
    if mask is not None:
        expected_posteriors = image_util.apply_mask_to_posteriors(
            expected_posteriors, mask)
    if posterior_smoothing_mm is not None:
        expected_posteriors = image_util.gaussian_smooth_posteriors(
            posteriors=expected_posteriors,
            kernel_size_mm=posterior_smoothing_mm,
            voxel_spacing_mm=(1, 1, 1))
    # compute expected segmentation
    expected_segmentation = image_util.posteriors_to_segmentation(
        expected_posteriors)
    if extract_largest_foreground_connected_component:
        largest_component = image_util.extract_largest_foreground_connected_component(
            multi_label_array=expected_segmentation)
        # make sure the test data is accurate by checking if more than one component exists
        assert not np.array_equal(largest_component, expected_segmentation)
        expected_segmentation = largest_component

    # instantiate the model
    checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt"
    create_model_and_store_checkpoint(config, checkpoint_path=checkpoint)

    # create single or ensemble inference pipeline
    inference_pipeline = InferencePipeline.create_from_checkpoint(
        path_to_checkpoint=checkpoint, model_config=config)
    assert inference_pipeline is not None
    full_image_inference_pipeline = EnsemblePipeline([inference_pipeline], config) \
        if is_ensemble else inference_pipeline

    # compute full image inference results
    inference_result = full_image_inference_pipeline \
        .predict_and_post_process_whole_image(image_channels=image_channels, mask=mask, voxel_spacing_mm=(1, 1, 1))

    # Segmentation must have same size as input image
    assert inference_result.segmentation.shape == image_size
    assert inference_result.posteriors.shape == (num_classes +
                                                 1, ) + image_size
    # check that the posteriors and segmentations are as expected. Flatten to a list so that the error
    # messages are more informative.
    assert np.allclose(inference_result.posteriors, expected_posteriors)
    assert np.array_equal(inference_result.segmentation, expected_segmentation)
def test_model_test(test_output_dirs: OutputFolderForTests,
                    use_partial_ground_truth: bool,
                    allow_partial_ground_truth: bool) -> None:
    """
    Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test
    :param test_output_dirs: The fixture in conftest.py
    :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users
    :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to
    """
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
    seed_everything(42)
    config = DummyModel()
    config.allow_incomplete_labels = allow_partial_ground_truth
    config.set_output_to(test_output_dirs.root_dir)
    placeholder_dataset_id = "place_holder_dataset_id"
    config.azure_dataset_id = placeholder_dataset_id
    transform = config.get_full_image_sample_transforms().test
    df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))

    if use_partial_ground_truth:
        config.check_exclusive = False
        config.ground_truth_ids = ["region", "region_1"]

        # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4,
        # and 5 are in the test dataset with:
        # Patient 3 has one missing ground truth channel: "region"
        df = df[df["subject"].ne(3) | df["channel"].ne("region")]
        # Patient 4 has all missing ground truth channels: "region", "region_1"
        df = df[df["subject"].ne(4) | df["channel"].ne("region")]
        df = df[df["subject"].ne(4) | df["channel"].ne("region_1")]
        # Patient 5 has no missing ground truth channels.

        config.dataset_data_frame = df

        df = df[df.subject.isin([3, 4, 5])]

        config.train_subject_ids = ['1', '2']
        config.test_subject_ids = ['3', '4', '5']
        config.val_subject_ids = ['6', '7']
    else:
        df = df[df.subject.isin([1, 2])]

    if use_partial_ground_truth and not allow_partial_ground_truth:
        with pytest.raises(ValueError) as value_error:
            # noinspection PyTypeHints
            config._datasets_for_inference = {
                ModelExecutionMode.TEST:
                FullImageDataset(config,
                                 df,
                                 full_image_sample_transforms=transform)
            }  # type: ignore
        assert "Patient 3 does not have channel 'region'" in str(
            value_error.value)
        return
    else:
        # noinspection PyTypeHints
        config._datasets_for_inference = {
            ModelExecutionMode.TEST:
            FullImageDataset(config,
                             df,
                             full_image_sample_transforms=transform)
        }  # type: ignore
    execution_mode = ModelExecutionMode.TEST
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    create_model_and_store_checkpoint(
        config,
        config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX)
    checkpoint_handler.additional_training_done()
    inference_results = model_testing.segmentation_model_test(
        config,
        execution_mode=execution_mode,
        checkpoint_paths=checkpoint_handler.get_checkpoints_to_test())
    epoch_dir = config.outputs_folder / get_best_epoch_results_path(
        execution_mode)
    total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower(
    )
    if not total_num_patients_column_name.endswith("s"):
        total_num_patients_column_name += "s"

    if use_partial_ground_truth:
        num_subjects = len(pd.unique(df["subject"]))
        if allow_partial_ground_truth:
            assert csv_column_contains_value(
                csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE,
                column_name=total_num_patients_column_name,
                value=num_subjects,
                contains_only_value=True)
            assert csv_column_contains_value(
                csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME,
                column_name=MetricsFileColumns.Dice.value,
                value='',
                contains_only_value=False)
    else:
        aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE)
        assert total_num_patients_column_name not in aggregates_df.columns  # Only added if using partial ground truth

        assert not csv_column_contains_value(
            csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME,
            column_name=MetricsFileColumns.Dice.value,
            value='',
            contains_only_value=False)

        assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6)
        assert config.outputs_folder.is_dir()
        assert epoch_dir.is_dir()
        patient1 = io_util.load_nifti_image(train_and_test_data_dir /
                                            "id1_channel1.nii.gz")
        patient2 = io_util.load_nifti_image(train_and_test_data_dir /
                                            "id2_channel1.nii.gz")

        assert_file_contains_string(epoch_dir / DATASET_ID_FILE,
                                    placeholder_dataset_id)
        assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE,
                                    "region")
        assert_text_files_match(
            epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME,
            train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME)
        assert_text_files_match(
            epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
            train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
        # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
        assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

        assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz",
                             get_image_shape(patient1), patient1.header, [137],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz",
                             get_image_shape(patient2), patient2.header, [137],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME,
                             get_image_shape(patient1), patient1.header, [1],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME,
                             get_image_shape(patient2), patient2.header, [1],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz",
                             get_image_shape(patient1), patient1.header, [117],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz",
                             get_image_shape(patient2), patient2.header, [117],
                             np.ubyte)
        thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER
        assert thumbnails_folder.is_dir()
        png_files = list(thumbnails_folder.glob("*.png"))
        overlays = [f for f in png_files if "_region_slice_" in str(f)]
        assert len(overlays) == len(df.subject.unique(
        )), "There should be one overlay/contour file per subject"

        # Writing dataset.csv normally happens at the beginning of training,
        # but this test reads off a saved checkpoint file.
        # Dataset.csv must be present for plot_cross_validation.
        config.write_dataset_files()
        # Test if the metrics files can be picked up correctly by the cross validation code
        config_and_files = get_config_and_results_for_offline_runs(config)
        result_files = config_and_files.files
        assert len(result_files) == 1
        for file in result_files:
            assert file.execution_mode == execution_mode
            assert file.dataset_csv_file is not None
            assert file.dataset_csv_file.exists()
            assert file.metrics_file is not None
            assert file.metrics_file.exists()