def test_store_image_as_short_nifti(test_output_dirs: TestOutputDirectories,
                                    norm_method: PhotometricNormalizationMethod,
                                    image_range: Any,
                                    window_level: Any) -> None:
    window, level = window_level if window_level else (400, 0)

    image = np.random.random_sample((1, 2, 3))
    image_shape = image.shape

    args = SegmentationModelBase(norm_method=norm_method, window=window, level=level, should_validate=False)

    # Get integer values that are in the image range
    image1 = LinearTransform.transform(data=image, input_range=(0, 1), output_range=args.output_range)
    image = image1.astype(np.short)  # type: ignore
    header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 1, 1))
    nifti_name = test_output_dirs.create_file_or_folder_path(default_image_name)
    io_util.store_image_as_short_nifti(image, header, nifti_name, args)

    if norm_method == PhotometricNormalizationMethod.CtWindow:
        output_range = get_range_for_window_level(args.level, args.window)
        image = LinearTransform.transform(data=image, input_range=args.output_range, output_range=output_range)
        image = image.astype(np.short)
    else:
        image = image * 1000

    t = np.unique(image)
    assert_nifti_content(nifti_name, image_shape, header, list(t), np.short)
def test_store_as_binary_nifti(test_output_dirs: TestOutputDirectories, image: Any) -> None:
    image = np.array(image)
    header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4))
    io_util.store_binary_mask_as_nifti(image, header,
                                       test_output_dirs.create_file_or_folder_path(default_image_name))
    t = np.unique(image)
    assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(t),
                         np.ubyte)
def test_register_and_score_model(test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected when used on a recently created
    model. This test is run after training an ensemble run in AzureML. It starts "submit_for_inference" via
    Popen. The inference run here is on a 2-channel model, whereas test_submit_for_inference works with a 1-channel
    model.
    """
    azureml_model = get_most_recent_model(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN)
    assert azureml_model is not None
    assert PYTHON_ENVIRONMENT_NAME in azureml_model.tags, "Environment name not present in model properties"
    # download the registered model and test that we can run the score pipeline on it
    model_root = Path(azureml_model.download(str(test_output_dirs.root_dir)))
    # The model needs to contain score.py at the root, the (merged) environment definition,
    # and the inference config.
    expected_files = [
        *fixed_paths.SCRIPTS_AT_ROOT,
        fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
        fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
        "InnerEye/ML/runner.py",
    ]
    for expected_file in expected_files:
        assert (model_root / expected_file).is_file(), f"File {expected_file} missing"
    checkpoint_folder = model_root / CHECKPOINT_FOLDER
    assert checkpoint_folder.is_dir()
    checkpoints = list(checkpoint_folder.rglob("*"))
    assert len(checkpoints) >= 1, "There must be at least 1 checkpoint"

    # create a dummy datastore to store the image data
    test_datastore = test_output_dirs.root_dir / "test_datastore"
    # move test data into the data folder to simulate an actual run
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
    img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"]
    data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER
    data_root.mkdir(parents=True)
    for f in img_files:
        shutil.copy(str(train_and_test_data_dir / f), str(data_root))

    # run score pipeline as a separate process
    python_executable = sys.executable
    [return_code1, stdout1] = spawn_and_monitor_subprocess(process=python_executable,
                                                           args=["--version"])
    assert return_code1 == 0
    print(f"Executing Python version {stdout1[0]}")
    return_code, stdout2 = spawn_and_monitor_subprocess(process=python_executable, args=[
        str(model_root / fixed_paths.SCORE_SCRIPT),
        f"--data_folder={str(data_root)}",
        f"--image_files={img_files[0]},{img_files[1]}",
        "--use_gpu=False"])

    # check that the process completed as expected
    assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}"
    expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME
    assert expected_segmentation_path.exists(), f"Result file not found: {expected_segmentation_path}"

    # sanity check the resulting segmentation
    expected_shape = get_nifti_shape(train_and_test_data_dir / img_files[0])
    image_header = get_unit_image_header()
    assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [3], np.ubyte)
def test_store_as_ubyte_nifti(test_output_dirs: TestOutputDirectories) -> None:
    image = np.random.random_sample((dim_z, dim_y, dim_x))
    # get values in [0, 255] range
    image = np.array((image + 1) * 255).astype(int)
    header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4))
    io_util.store_as_ubyte_nifti(image, header, test_output_dirs.create_file_or_folder_path(default_image_name))
    t = np.unique(image).astype(np.ubyte)
    assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name),
                         image.shape, header, list(t), np.ubyte)
def test_store_as_scaled_ubyte_nifti(test_output_dirs: TestOutputDirectories, input_range: Any) -> None:
    image = np.random.random_sample((dim_z, dim_y, dim_x))
    header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4))
    io_util.store_as_scaled_ubyte_nifti(image, header,
                                        test_output_dirs.create_file_or_folder_path(default_image_name),
                                        input_range)
    image = LinearTransform.transform(data=image, input_range=input_range, output_range=(0, 255))
    t = np.unique(image.astype(np.ubyte))
    assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(t),
                         np.ubyte)
Exemple #6
0
def test_store_posteriors_nifti(test_output_dirs: TestOutputDirectories,
                                image: Any, expected: Any) -> None:
    image = np.array(image)
    header = ImageHeader(origin=(1, 1, 1),
                         direction=(1, 0, 0, 0, 1, 0, 0, 0, 1),
                         spacing=(1, 1, 1))
    io_util.store_posteriors_as_nifti(
        image, header,
        test_output_dirs.create_file_or_folder_path(default_image_name))
    assert_nifti_content(
        test_output_dirs.create_file_or_folder_path(default_image_name),
        image.shape, header, list(expected), np.ubyte)
Exemple #7
0
def test_scale_and_unscale_image(
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test if an image in the CT value range can be recovered when we save dataset examples
    (undoing the effects of CT Windowing)
    """
    image_size = (5, 5, 5)
    spacing = (1, 2, 3)
    header = ImageHeader(origin=(0, 1, 0),
                         direction=(-1, 0, 0, 0, -1, 0, 0, 0, -1),
                         spacing=spacing)
    np.random.seed(0)
    # Random image values with mean -100, std 100. This will cover a range
    # from -400 to +200 HU
    image = np.random.normal(-100, 100, size=image_size)
    window = 200
    level = -100
    # Lower and upper bounds of the interval of raw CT values that will be retained.
    lower = level - window / 2
    upper = level + window / 2
    # Create a copy of the image with all values outside of the (Window, Level) range set to the boundaries.
    # When saving and loading back in, we will not be able to recover any values that fell outside those boundaries.
    image_restricted = image.copy()
    image_restricted[image < lower] = lower
    image_restricted[image > upper] = upper
    # The image will be saved with voxel type short
    image_restricted = image_restricted.astype(int)
    # Apply window and level, mapping to the usual CNN input value range
    cnn_input_range = (-1, +1)
    image_windowed = LinearTransform.transform(data=image,
                                               input_range=(lower, upper),
                                               output_range=cnn_input_range)
    args = SegmentationModelBase(
        norm_method=PhotometricNormalizationMethod.CtWindow,
        output_range=cnn_input_range,
        window=window,
        level=level,
        should_validate=False)

    file_name = test_output_dirs.create_file_or_folder_path(
        "scale_and_unscale_image.nii.gz")
    io_util.store_image_as_short_nifti(image_windowed, header, file_name, args)
    image_from_disk = io_util.load_nifti_image(file_name)
    # noinspection PyTypeChecker
    assert_nifti_content(file_name, image_size, header,
                         np.unique(image_restricted).tolist(), np.short)
    assert np.array_equal(image_from_disk.image, image_restricted)
def test_store_as_nifti(test_output_dirs: TestOutputDirectories, image_type: Any, scale: Any, input_range: Any,
                        output_range: Any) \
        -> None:
    image = np.random.random_sample((dim_z, dim_y, dim_x))
    spacingzyx = (1, 2, 3)
    path_image = test_output_dirs.create_file_or_folder_path(default_image_name)
    header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacingzyx)
    io_util.store_as_nifti(image, header, path_image,
                           image_type, scale, input_range, output_range)
    if scale:
        linear_transform = LinearTransform.transform(data=image, input_range=input_range, output_range=output_range)
        image = linear_transform.astype(image_type)  # type: ignore
    assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name),
                         image.shape, header, list(np.unique(image.astype(image_type))), image_type)

    loaded_image = io_util.load_nifti_image(path_image, image_type)
    assert loaded_image.header.spacing == spacingzyx
Exemple #9
0
def test_model_test(test_output_dirs: OutputFolderForTests) -> None:
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")

    config = DummyModel()
    config.set_output_to(test_output_dirs.root_dir)
    epoch = 1
    config.num_epochs = epoch
    assert config.get_test_epochs() == [epoch]
    placeholder_dataset_id = "place_holder_dataset_id"
    config.azure_dataset_id = placeholder_dataset_id
    transform = config.get_full_image_sample_transforms().test
    df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))
    df = df[df.subject.isin([1, 2])]
    # noinspection PyTypeHints
    config._datasets_for_inference = \
        {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)}  # type: ignore
    execution_mode = ModelExecutionMode.TEST
    checkpoint_handler = get_default_checkpoint_handler(model_config=config,
                                                        project_root=test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    stored_checkpoints = full_ml_test_data_path("checkpoints")
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    checkpoint_handler.additional_training_done()
    inference_results = model_testing.segmentation_model_test(config,
                                                              data_split=execution_mode,
                                                              checkpoint_handler=checkpoint_handler)
    epoch_dir = config.outputs_folder / get_epoch_results_path(epoch, execution_mode)
    assert inference_results.epochs[epoch] == pytest.approx(0.66606902, abs=1e-6)

    assert config.outputs_folder.is_dir()
    assert epoch_dir.is_dir()
    patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz")
    patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz")

    assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id)
    assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region")
    assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME,
                            train_and_test_data_dir / model_testing.METRICS_FILE_NAME)
    assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
                            train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
    # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
    assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

    assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1),
                         patient1.header,
                         [136], np.ubyte)
    assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2),
                         patient2.header,
                         [136], np.ubyte)
    assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1),
                         patient1.header,
                         [1], np.ubyte)
    assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2),
                         patient2.header,
                         [1], np.ubyte)
    assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1),
                         patient1.header,
                         [118], np.ubyte)
    assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2),
                         patient2.header,
                         [118], np.ubyte)
    thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER
    assert thumbnails_folder.is_dir()
    png_files = list(thumbnails_folder.glob("*.png"))
    overlays = [f for f in png_files if "_region_slice_" in str(f)]
    assert len(overlays) == len(df.subject.unique()), "There should be one overlay/contour file per subject"

    # Writing dataset.csv normally happens at the beginning of training,
    # but this test reads off a saved checkpoint file.
    # Dataset.csv must be present for plot_cross_validation.
    config.write_dataset_files()
    # Test if the metrics files can be picked up correctly by the cross validation code
    config_and_files = get_config_and_results_for_offline_runs(config)
    result_files = config_and_files.files
    assert len(result_files) == 1
    for file in result_files:
        assert file.execution_mode == execution_mode
        assert file.dataset_csv_file is not None
        assert file.dataset_csv_file.exists()
        assert file.metrics_file is not None
        assert file.metrics_file.exists()
def test_register_and_score_model(is_ensemble: bool,
                                  dataset_expected_spacing_xyz: Any,
                                  model_outside_package: bool,
                                  test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
    1) Registering a pre-trained model to AML
    2) Checking that a model zip from the registered model can be created successfully
    3) Calling the scoring pipeline to check inference can be run from the published model successfully
    """
    ws = get_default_workspace()
    # Get an existing config as template
    loader = get_model_loader("Tests.ML.configs" if model_outside_package else None)
    config: SegmentationModelBase = loader.create_model_config_from_name(
        model_name="BasicModel2EpochsOutsidePackage" if model_outside_package else "BasicModel2Epochs"
    )
    config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz
    config.set_output_to(test_output_dirs.root_dir)
    # copy checkpoints into the outputs (simulating a run)
    stored_checkpoints = full_ml_test_data_path(os.path.join("train_and_test_data", "checkpoints"))
    shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder))
    paths = [config.checkpoint_folder / "1_checkpoint.pth.tar"]
    checkpoints = paths * 2 if is_ensemble else paths
    model = None
    model_path = None
    # Mocking to get the source from the current directory
    # the score.py and python_wrapper.py cannot be moved inside the InnerEye package, which will be the
    # only code running (if these tests are run on the package).
    with mock.patch('InnerEye.Common.fixed_paths.repository_root_directory',
                    return_value=tests_root_directory().parent):
        try:
            tags = {"model_name": config.model_name}
            azure_config = get_default_azure_config()
            if model_outside_package:
                azure_config.extra_code_directory = "Tests"  # contains DummyModel
            deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(cfg.model_name), azure_cfg.docker_shm_size)
            ml_runner = MLRunner(config, azure_config, model_deployment_hook=deployment_hook)
            model, deployment_path, deployment_details = ml_runner.register_segmentation_model(
                workspace=ws,
                tags=tags,
                best_epoch=0,
                best_epoch_dice=0,
                checkpoint_paths=checkpoints,
                model_proc=ModelProcessing.DEFAULT)
            assert model is not None
            model_path = Path(model.get_model_path(model.name, model.version, ws))
            assert (model_path / fixed_paths.ENVIRONMENT_YAML_FILE_NAME).exists()
            assert (model_path / Path("InnerEye/ML/runner.py")).exists()
            assert deployment_path == Path(config.model_name)
            assert deployment_details == azure_config.docker_shm_size

            # move test data into the data folder to simulate an actual run
            train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")

            img_channel_1_name = "id1_channel1.nii.gz"
            img_channel_1_path = train_and_test_data_dir / img_channel_1_name
            img_channel_2_name = "id1_channel2.nii.gz"
            img_channel_2_path = train_and_test_data_dir / img_channel_2_name

            # download the registered model and test that we can run the score pipeline on it
            model_root = Path(model.download(str(test_output_dirs.root_dir)))
            # create a dummy datastore to store model checkpoints and image data
            # this simulates the code shapshot being executed in a real run
            test_datastore = test_output_dirs.root_dir / "test_datastore"
            shutil.move(
                str(model_root / "test_outputs"),
                str(test_datastore / RELATIVE_TEST_OUTPUTS_PATH)
            )
            data_root = test_datastore / DEFAULT_DATA_FOLDER
            os.makedirs(data_root)
            shutil.copy(str(img_channel_1_path), data_root)
            shutil.copy(str(img_channel_2_path), data_root)

            # run score pipeline as a separate process using the python_wrapper.py code to simulate a real run
            return_code = SubprocessConfig(process="python", args=[
                str(model_root / "python_wrapper.py"),
                "--spawnprocess=python",
                str(model_root / "score.py"),
                f"--data-folder={str(test_datastore)}",
                f"--test_image_channels={img_channel_1_name},{img_channel_2_name}",
                "--use_gpu=False"
            ]).spawn_and_monitor_subprocess()

            # check that the process completed as expected
            assert return_code == 0
            expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME
            assert expected_segmentation_path.exists()

            # sanity check the resulting segmentation
            expected_shape = get_nifti_shape(img_channel_1_path)
            image_header = get_unit_image_header()
            assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [0], np.ubyte)

        finally:
            # delete the registered model, and any downloaded artifacts
            shutil.rmtree(test_output_dirs.root_dir)
            if model and model_path:
                model.delete()
                shutil.rmtree(model_path)
Exemple #11
0
def test_store_inference_results(
        test_output_dirs: TestOutputDirectories) -> None:
    np.random.seed(0)
    num_classes = 2
    posterior = torch.nn.functional.softmax(torch.from_numpy(
        np.random.random_sample((num_classes, dim_z, dim_y, dim_x))),
                                            dim=0).numpy()
    segmentation = np.argmax(posterior, axis=0)
    assert segmentation.shape == (dim_z, dim_y, dim_x)

    posterior0 = to_unique_bytes(posterior[0], (0, 1))
    posterior1 = to_unique_bytes(posterior[1], (0, 1))
    spacing = (2.0, 2.0, 2.0)
    header = ImageHeader(origin=(0, 0, 0),
                         direction=(1, 0, 0, 0, 1, 0, 0, 0, 1),
                         spacing=spacing)
    inference_result = InferencePipeline.Result(epoch=1,
                                                patient_id=12,
                                                posteriors=posterior,
                                                segmentation=segmentation,
                                                voxel_spacing_mm=(1, 1, 1))

    test_config = _create_config_with_folders(test_output_dirs)

    assert test_config.class_and_index_with_background() == {
        "background": 0,
        "region": 1
    }

    results_folder = test_output_dirs.root_dir
    store_inference_results(inference_result, test_config,
                            Path(results_folder), header)

    assert_nifti_content(
        os.path.join(results_folder, "012", "posterior_background.nii.gz"),
        segmentation.shape, header, list(posterior0), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", "posterior_region.nii.gz"),
        segmentation.shape, header, list(posterior1), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", "background.nii.gz"),
        segmentation.shape, header, list([0, 1]), np.ubyte)

    assert_nifti_content(os.path.join(results_folder, "012", "region.nii.gz"),
                         segmentation.shape, header, list([0, 1]), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", DEFAULT_RESULT_IMAGE_NAME),
        segmentation.shape, header, list(np.unique(segmentation)), np.ubyte)

    assert_nifti_content(
        os.path.join(results_folder, "012", "uncertainty.nii.gz"),
        inference_result.uncertainty.shape, header, list([248, 249, 253,
                                                          254]), np.ubyte)
def test_register_and_score_model(
        is_ensemble: bool, dataset_expected_spacing_xyz: Any,
        model_outside_package: bool,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following:
    1) Registering a pre-trained model to AML
    2) Checking that a model zip from the registered model can be created successfully
    3) Calling the scoring pipeline to check inference can be run from the published model successfully
    """
    # We are creating checkpoints on the fly in this test, writing a randomly initialized model.
    set_random_seed(0)
    # Get an existing config as template
    loader = get_model_loader(
        "Tests.ML.configs" if model_outside_package else None)
    config: SegmentationModelBase = loader.create_model_config_from_name(
        model_name="BasicModel2EpochsOutsidePackage"
        if model_outside_package else "BasicModel2Epochs")
    config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz
    config.set_output_to(test_output_dirs.root_dir)
    checkpoints_absolute = []
    model_and_info = ModelAndInfo(
        config=config, model_execution_mode=ModelExecutionMode.TRAIN)
    model_and_info.create_model()
    model_and_info.create_optimizer()
    checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=10))
    if is_ensemble:
        checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=20))
    checkpoints_relative = [
        f.relative_to(config.checkpoint_folder) for f in checkpoints_absolute
    ]
    azureml_model = None
    # Simulate a project root: We can't derive that from the repository root because that might point
    # into Python's package folder
    project_root = Path(__file__).parent.parent
    # Double-check that we are at the right place, by testing for a file that would quite certainly not be found
    # somewhere else
    assert (project_root / fixed_paths.SCORE_SCRIPT).is_file()
    try:
        azure_config = get_default_azure_config()
        if model_outside_package:
            azure_config.extra_code_directory = "Tests"  # contains BasicModel2EpochsOutsidePackage
        deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(
            cfg.model_name), azure_cfg.docker_shm_size)
        ml_runner = MLRunner(config,
                             azure_config,
                             project_root=project_root,
                             model_deployment_hook=deployment_hook)
        registration_result = ml_runner.register_segmentation_model(
            model_description="",
            checkpoint_paths=checkpoints_absolute,
            model_proc=ModelProcessing.DEFAULT)
        assert registration_result is not None
        azureml_model, deployment_result = registration_result
        assert azureml_model is not None
        assert deployment_result == (Path(config.model_name),
                                     azure_config.docker_shm_size)

        # download the registered model and test that we can run the score pipeline on it
        model_root = Path(
            azureml_model.download(str(test_output_dirs.root_dir)))
        # The model needs to contain score.py at the root, the (merged) environment definition,
        # and the inference config.
        expected_files = [
            *fixed_paths.SCRIPTS_AT_ROOT,
            fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
            fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
            "InnerEye/ML/runner.py",
        ]
        # All checkpoints go into their own folder
        expected_files.extend(
            str(Path(CHECKPOINT_FOLDER) / c) for c in checkpoints_relative)
        for expected_file in expected_files:
            assert (model_root /
                    expected_file).is_file(), f"File {expected_file} missing"

        # create a dummy datastore to store the image data
        test_datastore = test_output_dirs.root_dir / "test_datastore"
        # move test data into the data folder to simulate an actual run
        train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
        img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"]
        data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER
        data_root.mkdir(parents=True)
        for f in img_files:
            shutil.copy(str(train_and_test_data_dir / f), str(data_root))

        # run score pipeline as a separate process
        python_executable = sys.executable
        [return_code1,
         stdout1] = SubprocessConfig(process=python_executable,
                                     args=["--version"
                                           ]).spawn_and_monitor_subprocess()
        assert return_code1 == 0
        print(f"Executing Python version {stdout1[0]}")
        return_code, stdout2 = SubprocessConfig(
            process=python_executable,
            args=[
                str(model_root / fixed_paths.SCORE_SCRIPT),
                f"--data_folder={str(data_root)}",
                f"--image_files={img_files[0]},{img_files[1]}",
                "--use_gpu=False"
            ]).spawn_and_monitor_subprocess()

        # check that the process completed as expected
        assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}"
        expected_segmentation_path = Path(
            model_root) / DEFAULT_RESULT_IMAGE_NAME
        assert expected_segmentation_path.exists(
        ), f"Result file not found: {expected_segmentation_path}"

        # sanity check the resulting segmentation
        expected_shape = get_nifti_shape(train_and_test_data_dir /
                                         img_files[0])
        image_header = get_unit_image_header()
        assert_nifti_content(str(expected_segmentation_path), expected_shape,
                             image_header, [3], np.ubyte)

    finally:
        # delete the registered model
        if azureml_model:
            azureml_model.delete()
def test_model_test(test_output_dirs: OutputFolderForTests,
                    use_partial_ground_truth: bool,
                    allow_partial_ground_truth: bool) -> None:
    """
    Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test
    :param test_output_dirs: The fixture in conftest.py
    :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users
    :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to
    """
    train_and_test_data_dir = full_ml_test_data_path("train_and_test_data")
    seed_everything(42)
    config = DummyModel()
    config.allow_incomplete_labels = allow_partial_ground_truth
    config.set_output_to(test_output_dirs.root_dir)
    placeholder_dataset_id = "place_holder_dataset_id"
    config.azure_dataset_id = placeholder_dataset_id
    transform = config.get_full_image_sample_transforms().test
    df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME))

    if use_partial_ground_truth:
        config.check_exclusive = False
        config.ground_truth_ids = ["region", "region_1"]

        # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4,
        # and 5 are in the test dataset with:
        # Patient 3 has one missing ground truth channel: "region"
        df = df[df["subject"].ne(3) | df["channel"].ne("region")]
        # Patient 4 has all missing ground truth channels: "region", "region_1"
        df = df[df["subject"].ne(4) | df["channel"].ne("region")]
        df = df[df["subject"].ne(4) | df["channel"].ne("region_1")]
        # Patient 5 has no missing ground truth channels.

        config.dataset_data_frame = df

        df = df[df.subject.isin([3, 4, 5])]

        config.train_subject_ids = ['1', '2']
        config.test_subject_ids = ['3', '4', '5']
        config.val_subject_ids = ['6', '7']
    else:
        df = df[df.subject.isin([1, 2])]

    if use_partial_ground_truth and not allow_partial_ground_truth:
        with pytest.raises(ValueError) as value_error:
            # noinspection PyTypeHints
            config._datasets_for_inference = {
                ModelExecutionMode.TEST:
                FullImageDataset(config,
                                 df,
                                 full_image_sample_transforms=transform)
            }  # type: ignore
        assert "Patient 3 does not have channel 'region'" in str(
            value_error.value)
        return
    else:
        # noinspection PyTypeHints
        config._datasets_for_inference = {
            ModelExecutionMode.TEST:
            FullImageDataset(config,
                             df,
                             full_image_sample_transforms=transform)
        }  # type: ignore
    execution_mode = ModelExecutionMode.TEST
    checkpoint_handler = get_default_checkpoint_handler(
        model_config=config, project_root=test_output_dirs.root_dir)
    # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder.
    create_model_and_store_checkpoint(
        config,
        config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX)
    checkpoint_handler.additional_training_done()
    inference_results = model_testing.segmentation_model_test(
        config,
        execution_mode=execution_mode,
        checkpoint_paths=checkpoint_handler.get_checkpoints_to_test())
    epoch_dir = config.outputs_folder / get_best_epoch_results_path(
        execution_mode)
    total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower(
    )
    if not total_num_patients_column_name.endswith("s"):
        total_num_patients_column_name += "s"

    if use_partial_ground_truth:
        num_subjects = len(pd.unique(df["subject"]))
        if allow_partial_ground_truth:
            assert csv_column_contains_value(
                csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE,
                column_name=total_num_patients_column_name,
                value=num_subjects,
                contains_only_value=True)
            assert csv_column_contains_value(
                csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME,
                column_name=MetricsFileColumns.Dice.value,
                value='',
                contains_only_value=False)
    else:
        aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE)
        assert total_num_patients_column_name not in aggregates_df.columns  # Only added if using partial ground truth

        assert not csv_column_contains_value(
            csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME,
            column_name=MetricsFileColumns.Dice.value,
            value='',
            contains_only_value=False)

        assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6)
        assert config.outputs_folder.is_dir()
        assert epoch_dir.is_dir()
        patient1 = io_util.load_nifti_image(train_and_test_data_dir /
                                            "id1_channel1.nii.gz")
        patient2 = io_util.load_nifti_image(train_and_test_data_dir /
                                            "id2_channel1.nii.gz")

        assert_file_contains_string(epoch_dir / DATASET_ID_FILE,
                                    placeholder_dataset_id)
        assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE,
                                    "region")
        assert_text_files_match(
            epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME,
            train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME)
        assert_text_files_match(
            epoch_dir / model_testing.METRICS_AGGREGATES_FILE,
            train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE)
        # Plotting results vary between platforms. Can only check if the file is generated, but not its contents.
        assert (epoch_dir / model_testing.BOXPLOT_FILE).exists()

        assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz",
                             get_image_shape(patient1), patient1.header, [137],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz",
                             get_image_shape(patient2), patient2.header, [137],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME,
                             get_image_shape(patient1), patient1.header, [1],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME,
                             get_image_shape(patient2), patient2.header, [1],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz",
                             get_image_shape(patient1), patient1.header, [117],
                             np.ubyte)
        assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz",
                             get_image_shape(patient2), patient2.header, [117],
                             np.ubyte)
        thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER
        assert thumbnails_folder.is_dir()
        png_files = list(thumbnails_folder.glob("*.png"))
        overlays = [f for f in png_files if "_region_slice_" in str(f)]
        assert len(overlays) == len(df.subject.unique(
        )), "There should be one overlay/contour file per subject"

        # Writing dataset.csv normally happens at the beginning of training,
        # but this test reads off a saved checkpoint file.
        # Dataset.csv must be present for plot_cross_validation.
        config.write_dataset_files()
        # Test if the metrics files can be picked up correctly by the cross validation code
        config_and_files = get_config_and_results_for_offline_runs(config)
        result_files = config_and_files.files
        assert len(result_files) == 1
        for file in result_files:
            assert file.execution_mode == execution_mode
            assert file.dataset_csv_file is not None
            assert file.dataset_csv_file.exists()
            assert file.metrics_file is not None
            assert file.metrics_file.exists()