def test_store_image_as_short_nifti(test_output_dirs: TestOutputDirectories, norm_method: PhotometricNormalizationMethod, image_range: Any, window_level: Any) -> None: window, level = window_level if window_level else (400, 0) image = np.random.random_sample((1, 2, 3)) image_shape = image.shape args = SegmentationModelBase(norm_method=norm_method, window=window, level=level, should_validate=False) # Get integer values that are in the image range image1 = LinearTransform.transform(data=image, input_range=(0, 1), output_range=args.output_range) image = image1.astype(np.short) # type: ignore header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 1, 1)) nifti_name = test_output_dirs.create_file_or_folder_path(default_image_name) io_util.store_image_as_short_nifti(image, header, nifti_name, args) if norm_method == PhotometricNormalizationMethod.CtWindow: output_range = get_range_for_window_level(args.level, args.window) image = LinearTransform.transform(data=image, input_range=args.output_range, output_range=output_range) image = image.astype(np.short) else: image = image * 1000 t = np.unique(image) assert_nifti_content(nifti_name, image_shape, header, list(t), np.short)
def test_store_as_binary_nifti(test_output_dirs: TestOutputDirectories, image: Any) -> None: image = np.array(image) header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4)) io_util.store_binary_mask_as_nifti(image, header, test_output_dirs.create_file_or_folder_path(default_image_name)) t = np.unique(image) assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(t), np.ubyte)
def test_register_and_score_model(test_output_dirs: OutputFolderForTests) -> None: """ End-to-end test which ensures the scoring pipeline is functioning as expected when used on a recently created model. This test is run after training an ensemble run in AzureML. It starts "submit_for_inference" via Popen. The inference run here is on a 2-channel model, whereas test_submit_for_inference works with a 1-channel model. """ azureml_model = get_most_recent_model(fallback_run_id_for_local_execution=FALLBACK_ENSEMBLE_RUN) assert azureml_model is not None assert PYTHON_ENVIRONMENT_NAME in azureml_model.tags, "Environment name not present in model properties" # download the registered model and test that we can run the score pipeline on it model_root = Path(azureml_model.download(str(test_output_dirs.root_dir))) # The model needs to contain score.py at the root, the (merged) environment definition, # and the inference config. expected_files = [ *fixed_paths.SCRIPTS_AT_ROOT, fixed_paths.ENVIRONMENT_YAML_FILE_NAME, fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME, "InnerEye/ML/runner.py", ] for expected_file in expected_files: assert (model_root / expected_file).is_file(), f"File {expected_file} missing" checkpoint_folder = model_root / CHECKPOINT_FOLDER assert checkpoint_folder.is_dir() checkpoints = list(checkpoint_folder.rglob("*")) assert len(checkpoints) >= 1, "There must be at least 1 checkpoint" # create a dummy datastore to store the image data test_datastore = test_output_dirs.root_dir / "test_datastore" # move test data into the data folder to simulate an actual run train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"] data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER data_root.mkdir(parents=True) for f in img_files: shutil.copy(str(train_and_test_data_dir / f), str(data_root)) # run score pipeline as a separate process python_executable = sys.executable [return_code1, stdout1] = spawn_and_monitor_subprocess(process=python_executable, args=["--version"]) assert return_code1 == 0 print(f"Executing Python version {stdout1[0]}") return_code, stdout2 = spawn_and_monitor_subprocess(process=python_executable, args=[ str(model_root / fixed_paths.SCORE_SCRIPT), f"--data_folder={str(data_root)}", f"--image_files={img_files[0]},{img_files[1]}", "--use_gpu=False"]) # check that the process completed as expected assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}" expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME assert expected_segmentation_path.exists(), f"Result file not found: {expected_segmentation_path}" # sanity check the resulting segmentation expected_shape = get_nifti_shape(train_and_test_data_dir / img_files[0]) image_header = get_unit_image_header() assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [3], np.ubyte)
def test_store_as_ubyte_nifti(test_output_dirs: TestOutputDirectories) -> None: image = np.random.random_sample((dim_z, dim_y, dim_x)) # get values in [0, 255] range image = np.array((image + 1) * 255).astype(int) header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4)) io_util.store_as_ubyte_nifti(image, header, test_output_dirs.create_file_or_folder_path(default_image_name)) t = np.unique(image).astype(np.ubyte) assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(t), np.ubyte)
def test_store_as_scaled_ubyte_nifti(test_output_dirs: TestOutputDirectories, input_range: Any) -> None: image = np.random.random_sample((dim_z, dim_y, dim_x)) header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 2, 4)) io_util.store_as_scaled_ubyte_nifti(image, header, test_output_dirs.create_file_or_folder_path(default_image_name), input_range) image = LinearTransform.transform(data=image, input_range=input_range, output_range=(0, 255)) t = np.unique(image.astype(np.ubyte)) assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(t), np.ubyte)
def test_store_posteriors_nifti(test_output_dirs: TestOutputDirectories, image: Any, expected: Any) -> None: image = np.array(image) header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=(1, 1, 1)) io_util.store_posteriors_as_nifti( image, header, test_output_dirs.create_file_or_folder_path(default_image_name)) assert_nifti_content( test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(expected), np.ubyte)
def test_scale_and_unscale_image( test_output_dirs: TestOutputDirectories) -> None: """ Test if an image in the CT value range can be recovered when we save dataset examples (undoing the effects of CT Windowing) """ image_size = (5, 5, 5) spacing = (1, 2, 3) header = ImageHeader(origin=(0, 1, 0), direction=(-1, 0, 0, 0, -1, 0, 0, 0, -1), spacing=spacing) np.random.seed(0) # Random image values with mean -100, std 100. This will cover a range # from -400 to +200 HU image = np.random.normal(-100, 100, size=image_size) window = 200 level = -100 # Lower and upper bounds of the interval of raw CT values that will be retained. lower = level - window / 2 upper = level + window / 2 # Create a copy of the image with all values outside of the (Window, Level) range set to the boundaries. # When saving and loading back in, we will not be able to recover any values that fell outside those boundaries. image_restricted = image.copy() image_restricted[image < lower] = lower image_restricted[image > upper] = upper # The image will be saved with voxel type short image_restricted = image_restricted.astype(int) # Apply window and level, mapping to the usual CNN input value range cnn_input_range = (-1, +1) image_windowed = LinearTransform.transform(data=image, input_range=(lower, upper), output_range=cnn_input_range) args = SegmentationModelBase( norm_method=PhotometricNormalizationMethod.CtWindow, output_range=cnn_input_range, window=window, level=level, should_validate=False) file_name = test_output_dirs.create_file_or_folder_path( "scale_and_unscale_image.nii.gz") io_util.store_image_as_short_nifti(image_windowed, header, file_name, args) image_from_disk = io_util.load_nifti_image(file_name) # noinspection PyTypeChecker assert_nifti_content(file_name, image_size, header, np.unique(image_restricted).tolist(), np.short) assert np.array_equal(image_from_disk.image, image_restricted)
def test_store_as_nifti(test_output_dirs: TestOutputDirectories, image_type: Any, scale: Any, input_range: Any, output_range: Any) \ -> None: image = np.random.random_sample((dim_z, dim_y, dim_x)) spacingzyx = (1, 2, 3) path_image = test_output_dirs.create_file_or_folder_path(default_image_name) header = ImageHeader(origin=(1, 1, 1), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacingzyx) io_util.store_as_nifti(image, header, path_image, image_type, scale, input_range, output_range) if scale: linear_transform = LinearTransform.transform(data=image, input_range=input_range, output_range=output_range) image = linear_transform.astype(image_type) # type: ignore assert_nifti_content(test_output_dirs.create_file_or_folder_path(default_image_name), image.shape, header, list(np.unique(image.astype(image_type))), image_type) loaded_image = io_util.load_nifti_image(path_image, image_type) assert loaded_image.header.spacing == spacingzyx
def test_model_test(test_output_dirs: OutputFolderForTests) -> None: train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") config = DummyModel() config.set_output_to(test_output_dirs.root_dir) epoch = 1 config.num_epochs = epoch assert config.get_test_epochs() == [epoch] placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) df = df[df.subject.isin([1, 2])] # noinspection PyTypeHints config._datasets_for_inference = \ {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)} # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. stored_checkpoints = full_ml_test_data_path("checkpoints") shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder)) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test(config, data_split=execution_mode, checkpoint_handler=checkpoint_handler) epoch_dir = config.outputs_folder / get_epoch_results_path(epoch, execution_mode) assert inference_results.epochs[epoch] == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME, train_and_test_data_dir / model_testing.METRICS_FILE_NAME) assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [136], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [136], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [118], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [118], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique()), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()
def test_register_and_score_model(is_ensemble: bool, dataset_expected_spacing_xyz: Any, model_outside_package: bool, test_output_dirs: OutputFolderForTests) -> None: """ End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following: 1) Registering a pre-trained model to AML 2) Checking that a model zip from the registered model can be created successfully 3) Calling the scoring pipeline to check inference can be run from the published model successfully """ ws = get_default_workspace() # Get an existing config as template loader = get_model_loader("Tests.ML.configs" if model_outside_package else None) config: SegmentationModelBase = loader.create_model_config_from_name( model_name="BasicModel2EpochsOutsidePackage" if model_outside_package else "BasicModel2Epochs" ) config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz config.set_output_to(test_output_dirs.root_dir) # copy checkpoints into the outputs (simulating a run) stored_checkpoints = full_ml_test_data_path(os.path.join("train_and_test_data", "checkpoints")) shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder)) paths = [config.checkpoint_folder / "1_checkpoint.pth.tar"] checkpoints = paths * 2 if is_ensemble else paths model = None model_path = None # Mocking to get the source from the current directory # the score.py and python_wrapper.py cannot be moved inside the InnerEye package, which will be the # only code running (if these tests are run on the package). with mock.patch('InnerEye.Common.fixed_paths.repository_root_directory', return_value=tests_root_directory().parent): try: tags = {"model_name": config.model_name} azure_config = get_default_azure_config() if model_outside_package: azure_config.extra_code_directory = "Tests" # contains DummyModel deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path(cfg.model_name), azure_cfg.docker_shm_size) ml_runner = MLRunner(config, azure_config, model_deployment_hook=deployment_hook) model, deployment_path, deployment_details = ml_runner.register_segmentation_model( workspace=ws, tags=tags, best_epoch=0, best_epoch_dice=0, checkpoint_paths=checkpoints, model_proc=ModelProcessing.DEFAULT) assert model is not None model_path = Path(model.get_model_path(model.name, model.version, ws)) assert (model_path / fixed_paths.ENVIRONMENT_YAML_FILE_NAME).exists() assert (model_path / Path("InnerEye/ML/runner.py")).exists() assert deployment_path == Path(config.model_name) assert deployment_details == azure_config.docker_shm_size # move test data into the data folder to simulate an actual run train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") img_channel_1_name = "id1_channel1.nii.gz" img_channel_1_path = train_and_test_data_dir / img_channel_1_name img_channel_2_name = "id1_channel2.nii.gz" img_channel_2_path = train_and_test_data_dir / img_channel_2_name # download the registered model and test that we can run the score pipeline on it model_root = Path(model.download(str(test_output_dirs.root_dir))) # create a dummy datastore to store model checkpoints and image data # this simulates the code shapshot being executed in a real run test_datastore = test_output_dirs.root_dir / "test_datastore" shutil.move( str(model_root / "test_outputs"), str(test_datastore / RELATIVE_TEST_OUTPUTS_PATH) ) data_root = test_datastore / DEFAULT_DATA_FOLDER os.makedirs(data_root) shutil.copy(str(img_channel_1_path), data_root) shutil.copy(str(img_channel_2_path), data_root) # run score pipeline as a separate process using the python_wrapper.py code to simulate a real run return_code = SubprocessConfig(process="python", args=[ str(model_root / "python_wrapper.py"), "--spawnprocess=python", str(model_root / "score.py"), f"--data-folder={str(test_datastore)}", f"--test_image_channels={img_channel_1_name},{img_channel_2_name}", "--use_gpu=False" ]).spawn_and_monitor_subprocess() # check that the process completed as expected assert return_code == 0 expected_segmentation_path = Path(model_root) / DEFAULT_RESULT_IMAGE_NAME assert expected_segmentation_path.exists() # sanity check the resulting segmentation expected_shape = get_nifti_shape(img_channel_1_path) image_header = get_unit_image_header() assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [0], np.ubyte) finally: # delete the registered model, and any downloaded artifacts shutil.rmtree(test_output_dirs.root_dir) if model and model_path: model.delete() shutil.rmtree(model_path)
def test_store_inference_results( test_output_dirs: TestOutputDirectories) -> None: np.random.seed(0) num_classes = 2 posterior = torch.nn.functional.softmax(torch.from_numpy( np.random.random_sample((num_classes, dim_z, dim_y, dim_x))), dim=0).numpy() segmentation = np.argmax(posterior, axis=0) assert segmentation.shape == (dim_z, dim_y, dim_x) posterior0 = to_unique_bytes(posterior[0], (0, 1)) posterior1 = to_unique_bytes(posterior[1], (0, 1)) spacing = (2.0, 2.0, 2.0) header = ImageHeader(origin=(0, 0, 0), direction=(1, 0, 0, 0, 1, 0, 0, 0, 1), spacing=spacing) inference_result = InferencePipeline.Result(epoch=1, patient_id=12, posteriors=posterior, segmentation=segmentation, voxel_spacing_mm=(1, 1, 1)) test_config = _create_config_with_folders(test_output_dirs) assert test_config.class_and_index_with_background() == { "background": 0, "region": 1 } results_folder = test_output_dirs.root_dir store_inference_results(inference_result, test_config, Path(results_folder), header) assert_nifti_content( os.path.join(results_folder, "012", "posterior_background.nii.gz"), segmentation.shape, header, list(posterior0), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "posterior_region.nii.gz"), segmentation.shape, header, list(posterior1), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "background.nii.gz"), segmentation.shape, header, list([0, 1]), np.ubyte) assert_nifti_content(os.path.join(results_folder, "012", "region.nii.gz"), segmentation.shape, header, list([0, 1]), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", DEFAULT_RESULT_IMAGE_NAME), segmentation.shape, header, list(np.unique(segmentation)), np.ubyte) assert_nifti_content( os.path.join(results_folder, "012", "uncertainty.nii.gz"), inference_result.uncertainty.shape, header, list([248, 249, 253, 254]), np.ubyte)
def test_register_and_score_model( is_ensemble: bool, dataset_expected_spacing_xyz: Any, model_outside_package: bool, test_output_dirs: OutputFolderForTests) -> None: """ End-to-end test which ensures the scoring pipeline is functioning as expected by performing the following: 1) Registering a pre-trained model to AML 2) Checking that a model zip from the registered model can be created successfully 3) Calling the scoring pipeline to check inference can be run from the published model successfully """ # We are creating checkpoints on the fly in this test, writing a randomly initialized model. set_random_seed(0) # Get an existing config as template loader = get_model_loader( "Tests.ML.configs" if model_outside_package else None) config: SegmentationModelBase = loader.create_model_config_from_name( model_name="BasicModel2EpochsOutsidePackage" if model_outside_package else "BasicModel2Epochs") config.dataset_expected_spacing_xyz = dataset_expected_spacing_xyz config.set_output_to(test_output_dirs.root_dir) checkpoints_absolute = [] model_and_info = ModelAndInfo( config=config, model_execution_mode=ModelExecutionMode.TRAIN) model_and_info.create_model() model_and_info.create_optimizer() checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=10)) if is_ensemble: checkpoints_absolute.append(model_and_info.save_checkpoint(epoch=20)) checkpoints_relative = [ f.relative_to(config.checkpoint_folder) for f in checkpoints_absolute ] azureml_model = None # Simulate a project root: We can't derive that from the repository root because that might point # into Python's package folder project_root = Path(__file__).parent.parent # Double-check that we are at the right place, by testing for a file that would quite certainly not be found # somewhere else assert (project_root / fixed_paths.SCORE_SCRIPT).is_file() try: azure_config = get_default_azure_config() if model_outside_package: azure_config.extra_code_directory = "Tests" # contains BasicModel2EpochsOutsidePackage deployment_hook = lambda cfg, azure_cfg, mdl, is_ens: (Path( cfg.model_name), azure_cfg.docker_shm_size) ml_runner = MLRunner(config, azure_config, project_root=project_root, model_deployment_hook=deployment_hook) registration_result = ml_runner.register_segmentation_model( model_description="", checkpoint_paths=checkpoints_absolute, model_proc=ModelProcessing.DEFAULT) assert registration_result is not None azureml_model, deployment_result = registration_result assert azureml_model is not None assert deployment_result == (Path(config.model_name), azure_config.docker_shm_size) # download the registered model and test that we can run the score pipeline on it model_root = Path( azureml_model.download(str(test_output_dirs.root_dir))) # The model needs to contain score.py at the root, the (merged) environment definition, # and the inference config. expected_files = [ *fixed_paths.SCRIPTS_AT_ROOT, fixed_paths.ENVIRONMENT_YAML_FILE_NAME, fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME, "InnerEye/ML/runner.py", ] # All checkpoints go into their own folder expected_files.extend( str(Path(CHECKPOINT_FOLDER) / c) for c in checkpoints_relative) for expected_file in expected_files: assert (model_root / expected_file).is_file(), f"File {expected_file} missing" # create a dummy datastore to store the image data test_datastore = test_output_dirs.root_dir / "test_datastore" # move test data into the data folder to simulate an actual run train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") img_files = ["id1_channel1.nii.gz", "id1_channel2.nii.gz"] data_root = test_datastore / fixed_paths.DEFAULT_DATA_FOLDER data_root.mkdir(parents=True) for f in img_files: shutil.copy(str(train_and_test_data_dir / f), str(data_root)) # run score pipeline as a separate process python_executable = sys.executable [return_code1, stdout1] = SubprocessConfig(process=python_executable, args=["--version" ]).spawn_and_monitor_subprocess() assert return_code1 == 0 print(f"Executing Python version {stdout1[0]}") return_code, stdout2 = SubprocessConfig( process=python_executable, args=[ str(model_root / fixed_paths.SCORE_SCRIPT), f"--data_folder={str(data_root)}", f"--image_files={img_files[0]},{img_files[1]}", "--use_gpu=False" ]).spawn_and_monitor_subprocess() # check that the process completed as expected assert return_code == 0, f"Subprocess failed with return code {return_code}. Stdout: {os.linesep.join(stdout2)}" expected_segmentation_path = Path( model_root) / DEFAULT_RESULT_IMAGE_NAME assert expected_segmentation_path.exists( ), f"Result file not found: {expected_segmentation_path}" # sanity check the resulting segmentation expected_shape = get_nifti_shape(train_and_test_data_dir / img_files[0]) image_header = get_unit_image_header() assert_nifti_content(str(expected_segmentation_path), expected_shape, image_header, [3], np.ubyte) finally: # delete the registered model if azureml_model: azureml_model.delete()
def test_model_test(test_output_dirs: OutputFolderForTests, use_partial_ground_truth: bool, allow_partial_ground_truth: bool) -> None: """ Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test :param test_output_dirs: The fixture in conftest.py :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to """ train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") seed_everything(42) config = DummyModel() config.allow_incomplete_labels = allow_partial_ground_truth config.set_output_to(test_output_dirs.root_dir) placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) if use_partial_ground_truth: config.check_exclusive = False config.ground_truth_ids = ["region", "region_1"] # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4, # and 5 are in the test dataset with: # Patient 3 has one missing ground truth channel: "region" df = df[df["subject"].ne(3) | df["channel"].ne("region")] # Patient 4 has all missing ground truth channels: "region", "region_1" df = df[df["subject"].ne(4) | df["channel"].ne("region")] df = df[df["subject"].ne(4) | df["channel"].ne("region_1")] # Patient 5 has no missing ground truth channels. config.dataset_data_frame = df df = df[df.subject.isin([3, 4, 5])] config.train_subject_ids = ['1', '2'] config.test_subject_ids = ['3', '4', '5'] config.val_subject_ids = ['6', '7'] else: df = df[df.subject.isin([1, 2])] if use_partial_ground_truth and not allow_partial_ground_truth: with pytest.raises(ValueError) as value_error: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore assert "Patient 3 does not have channel 'region'" in str( value_error.value) return else: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. create_model_and_store_checkpoint( config, config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test( config, execution_mode=execution_mode, checkpoint_paths=checkpoint_handler.get_checkpoints_to_test()) epoch_dir = config.outputs_folder / get_best_epoch_results_path( execution_mode) total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower( ) if not total_num_patients_column_name.endswith("s"): total_num_patients_column_name += "s" if use_partial_ground_truth: num_subjects = len(pd.unique(df["subject"])) if allow_partial_ground_truth: assert csv_column_contains_value( csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE, column_name=total_num_patients_column_name, value=num_subjects, contains_only_value=True) assert csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) else: aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE) assert total_num_patients_column_name not in aggregates_df.columns # Only added if using partial ground truth assert not csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match( epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME, train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME) assert_text_files_match( epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [117], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [117], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique( )), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()