def test_run_scoring(test_output_dirs: OutputFolderForTests, is_ensemble: bool) -> None: """ Run the scoring script on an image file. This test lives outside the normal Tests folder because it imports "score.py" from the repository root folder. If we switched to InnerEye as a package, we would have to treat this import special. The inference run here is on a 1-channel model, whereas test_register_and_score_model works with a 2-channel model. """ seed_everything(42) checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt" image_size = (40, 40, 40) test_crop_size = image_size dummy_config = DummyModel() dummy_config.test_crop_size = test_crop_size dummy_config.inference_stride_size = (10, 10, 10) dummy_config.inference_batch_size = 10 create_model_and_store_checkpoint(dummy_config, checkpoint) all_paths = [checkpoint] * 2 if is_ensemble else [checkpoint] inference_pipeline, dummy_config = create_inference_pipeline(dummy_config, all_paths, use_gpu=False) image_with_header = io_util.load_nifti_image(test_image) image_with_header.image = image_with_header.image[:image_size[ 0], :image_size[1], :image_size[2]] result = run_inference([image_with_header, image_with_header], inference_pipeline, dummy_config) assert image_with_header.image.shape == result.shape # type: ignore print(f"Unique result values: {np.unique(result)}") assert np.all(result == 1)
def test_create_from_checkpoint_ensemble( test_output_dirs: OutputFolderForTests) -> None: config = ClassificationModelForTesting() path_to_checkpoint_non_exist = test_output_dirs.root_dir / "does_not_exist.ckpt" path_to_checkpoint_exist = test_output_dirs.root_dir / "foo.ckpt" create_model_and_store_checkpoint(config, path_to_checkpoint_exist) # when all checkpoints do not exist, raise error with pytest.raises(ValueError): paths_to_checkpoint = [path_to_checkpoint_non_exist] * 5 ScalarEnsemblePipeline.create_from_checkpoint(paths_to_checkpoint, config) # when a few checkpoints exist, ensemble with those paths_to_checkpoint = [path_to_checkpoint_non_exist ] * 3 + [path_to_checkpoint_exist] * 2 inference_pipeline = ScalarEnsemblePipeline.create_from_checkpoint( paths_to_checkpoint, config) assert isinstance(inference_pipeline, ScalarEnsemblePipeline) assert len(inference_pipeline.pipelines) == 2 # when all checkpoints exist paths_to_checkpoint = [path_to_checkpoint_exist] * 5 inference_pipeline = ScalarEnsemblePipeline.create_from_checkpoint( paths_to_checkpoint, config) assert isinstance(inference_pipeline, ScalarEnsemblePipeline) assert len(inference_pipeline.pipelines) == 5
def test_score_image_dicom_mock_none( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works. Finally there is no mocking and full image scoring is run using the PassThroughModel. :param test_output_dirs: Test output directories. """ model_config = PassThroughModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = zip_dicom_series(model_folder) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True) segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder)
def test_create_inference_pipeline( config: ModelConfigBase, expected_inference_type: type, expected_ensemble_type: type, test_output_dirs: OutputFolderForTests) -> None: config.set_output_to(test_output_dirs.root_dir) checkpoint_path = test_output_dirs.root_dir / "checkpoint.ckpt" create_model_and_store_checkpoint(config, checkpoint_path) inference = create_inference_pipeline(config, [checkpoint_path]) assert isinstance(inference, expected_inference_type) ensemble = create_inference_pipeline(config, [checkpoint_path] * 2) assert isinstance(ensemble, expected_ensemble_type)
def test_create_from_checkpoint_non_ensemble( test_output_dirs: OutputFolderForTests) -> None: config = ClassificationModelForTesting() # when checkpoint does not exist, return None path_to_checkpoint = test_output_dirs.root_dir / "foo.ckpt" inference_pipeline = ScalarInferencePipeline.create_from_checkpoint( path_to_checkpoint, config) assert inference_pipeline is None create_model_and_store_checkpoint(config, path_to_checkpoint) inference_pipeline = ScalarInferencePipeline.create_from_checkpoint( path_to_checkpoint, config) assert isinstance(inference_pipeline, ScalarInferencePipeline)
def test_score_image_dicom_mock_run_store( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works, by mocking out run and store functions. This mocks out run_inference and store_as_ubyte_nifti so that init_from_model_inference_json is tested in addition to the tests in test_score_image_dicom_mock_all. :param test_output_dirs: Test output directories. """ mock_segmentation = {'mock_segmentation': True} model_config = DummyModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = test_output_dirs.root_dir / "temp_pack_dicom_series" / "dicom_series.zip" zip_known_dicom_series(zipped_dicom_series_path) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True, model_id="Dummy:1") with mock.patch('score.run_inference', return_value=mock_segmentation) as mock_run_inference: with mock.patch( 'score.store_as_ubyte_nifti', return_value=HNSEGMENTATION_FILE) as mock_store_as_ubyte_nifti: segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder) mock_run_inference.assert_called() mock_store_as_ubyte_nifti.assert_called()
def test_score_image_dicom_mock_run( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works, by mocking out only the run scoring function. This mocks out run_inference so that store_as_ubyte_nifti is tested in addition to the tests in test_score_image_dicom_mock_run_store. :param test_output_dirs: Test output directories. """ model_config = DummyModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = zip_dicom_series(model_folder) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True) image_with_header = io_util.load_nifti_image(HNSEGMENTATION_FILE) with mock.patch( 'score.run_inference', return_value=image_with_header.image) as mock_run_inference: segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder) mock_run_inference.assert_called()
def test_runner_restart(test_output_dirs: OutputFolderForTests) -> None: """ Test if starting training from a folder where the checkpoints folder already has recovery checkpoints picks up that it is a recovery run. Also checks that we update the start epoch in the config at loading time. """ model_config = DummyClassification() model_config.set_output_to(test_output_dirs.root_dir) model_config.num_epochs = FIXED_EPOCH + 2 # We save all checkpoints - if recovery works as expected we should have a new checkpoint for epoch 4, 5. model_config.recovery_checkpoint_save_interval = 1 model_config.recovery_checkpoints_save_last_k = -1 runner = MLRunner(model_config=model_config) runner.setup(use_mount_or_download_dataset=False) # Epochs are 0 based for saving create_model_and_store_checkpoint(model_config, runner.container.checkpoint_folder / f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch=" f"{FIXED_EPOCH - 1}{CHECKPOINT_SUFFIX}", weights_only=False) azure_config = get_default_azure_config() checkpoint_handler = CheckpointHandler( azure_config=azure_config, container=runner.container, project_root=test_output_dirs.root_dir) _, storing_logger = model_train(checkpoint_handler=checkpoint_handler, container=runner.container) # We expect to have 4 checkpoints, FIXED_EPOCH (recovery), FIXED_EPOCH+1, FIXED_EPOCH and best. assert len(os.listdir(runner.container.checkpoint_folder)) == 4 assert (runner.container.checkpoint_folder / f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch=" f"{FIXED_EPOCH - 1}{CHECKPOINT_SUFFIX}").exists() assert (runner.container.checkpoint_folder / f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch=" f"{FIXED_EPOCH}{CHECKPOINT_SUFFIX}").exists() assert (runner.container.checkpoint_folder / f"{RECOVERY_CHECKPOINT_FILE_NAME}_epoch=" f"{FIXED_EPOCH + 1}{CHECKPOINT_SUFFIX}").exists() assert (runner.container.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX).exists() # Check that we really restarted epoch from epoch FIXED_EPOCH. assert list(storing_logger.epochs) == [FIXED_EPOCH, FIXED_EPOCH + 1] # type: ignore
def test_invalid_stride_size(test_output_dirs: OutputFolderForTests) -> None: config = SegmentationModelBase( architecture="UNet3D", feature_channels=[1], crop_size=(64, 64, 64), test_crop_size=(80, 80, 80), image_channels=["mr"], ground_truth_ids=["tumour_mass", "subtract"], train_batch_size=8, inference_batch_size=1, inference_stride_size=(120, 120, 120), should_validate=False ) config.set_output_to(test_output_dirs.root_dir) checkpoint_path = test_output_dirs.root_dir / "checkpoint.ckpt" create_model_and_store_checkpoint(config, checkpoint_path) with pytest.raises(ValueError) as ex: load_from_checkpoint_and_adjust_for_inference(config=config, checkpoint_path=checkpoint_path) assert "The inference stride size (120, 120, 120) must be smaller" in ex.value.args[0] assert str(config.inference_stride_size) in ex.value.args[0] assert str(config.test_crop_size) in ex.value.args[0]
def test_model_inference_train_and_test( test_output_dirs: OutputFolderForTests, perform_cross_validation: bool, perform_training_set_inference: bool) -> None: config = DummyModel() config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0 config.perform_training_set_inference = perform_training_set_inference # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds. config.is_plotting_enabled = common_util.is_linux() config.set_output_to(test_output_dirs.root_dir) config.local_dataset = full_ml_test_data_path() checkpoint_path = config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX create_model_and_store_checkpoint(config, checkpoint_path) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.additional_training_done() result, _, _ = MLRunner(config).model_inference_train_and_test( checkpoint_handler=checkpoint_handler) if result is None: raise ValueError("Error result cannot be None") assert isinstance(result, InferenceMetricsForSegmentation) epoch_folder_name = common_util.BEST_EPOCH_FOLDER_NAME for folder in [ ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value, ModelExecutionMode.TEST.value ]: results_folder = config.outputs_folder / epoch_folder_name / folder folder_exists = results_folder.is_dir() if folder in [ ModelExecutionMode.TRAIN.value, ModelExecutionMode.VAL.value ]: if perform_training_set_inference: assert folder_exists else: assert folder_exists
def run_model_inference_train_and_test( test_output_dirs: OutputFolderForTests, perform_cross_validation: bool, inference_on_train_set: Optional[bool] = None, inference_on_val_set: Optional[bool] = None, inference_on_test_set: Optional[bool] = None, ensemble_inference_on_train_set: Optional[bool] = None, ensemble_inference_on_val_set: Optional[bool] = None, ensemble_inference_on_test_set: Optional[bool] = None, model_proc: ModelProcessing = ModelProcessing.DEFAULT) -> None: """ Test running inference produces expected output metrics, files, folders and calls to upload_folder. :param test_output_dirs: Test output directories. :param perform_cross_validation: Whether to test with cross validation. :param inference_on_train_set: Override for inference on train data sets. :param inference_on_val_set: Override for inference on validation data sets. :param inference_on_test_set: Override for inference on test data sets. :param ensemble_inference_on_train_set: Override for ensemble inference on train data sets. :param ensemble_inference_on_val_set: Override for ensemble inference on validation data sets. :param ensemble_inference_on_test_set: Override for ensemble inference on test data sets. :param model_proc: Model processing to test. :return: None. """ dummy_model = DummyModel() config = PassThroughModel() # Copy settings from DummyModel config.image_channels = dummy_model.image_channels config.ground_truth_ids = dummy_model.ground_truth_ids config.ground_truth_ids_display_names = dummy_model.ground_truth_ids_display_names config.colours = dummy_model.colours config.fill_holes = dummy_model.fill_holes config.roi_interpreted_types = dummy_model.roi_interpreted_types config.test_crop_size = (16, 16, 16) config.number_of_cross_validation_splits = 2 if perform_cross_validation else 0 config.inference_on_train_set = inference_on_train_set config.inference_on_val_set = inference_on_val_set config.inference_on_test_set = inference_on_test_set config.ensemble_inference_on_train_set = ensemble_inference_on_train_set config.ensemble_inference_on_val_set = ensemble_inference_on_val_set config.ensemble_inference_on_test_set = ensemble_inference_on_test_set # Plotting crashes with random TCL errors on Windows, disable that for Windows PR builds. config.is_plotting_enabled = common_util.is_linux() config.set_output_to(test_output_dirs.root_dir) train_and_test_data_small_dir = test_output_dirs.root_dir / "train_and_test_data_small" config.local_dataset = create_train_and_test_data_small_dataset( config.test_crop_size, full_ml_test_data_path(), "train_and_test_data", train_and_test_data_small_dir, "data") checkpoint_path = config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX create_model_and_store_checkpoint(config, checkpoint_path) checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) checkpoint_handler.additional_training_done() mock_upload_path = test_output_dirs.root_dir / "mock_upload" mock_upload_path.mkdir() run = create_mock_run(mock_upload_path, config) azure_config = Mock(name='mock_azure_config') azure_config.fetch_run.return_value = run runner = MLRunner(model_config=config, azure_config=azure_config) with mock.patch("InnerEye.ML.model_testing.PARENT_RUN_CONTEXT", run): metrics = runner.model_inference_train_and_test( checkpoint_paths=checkpoint_handler.get_checkpoints_to_test(), model_proc=model_proc) if model_proc == ModelProcessing.ENSEMBLE_CREATION: # Create a fake ensemble dataset.csv dataset_df = create_dataset_df() dataset_df.to_csv(config.outputs_folder / DATASET_CSV_FILE_NAME) with mock.patch.object(PlotCrossValidationConfig, 'azure_config', return_value=azure_config): with mock.patch("InnerEye.Azure.azure_util.PARENT_RUN_CONTEXT", run): with mock.patch("InnerEye.ML.run_ml.PARENT_RUN_CONTEXT", run): runner.plot_cross_validation_and_upload_results() runner.generate_report(ModelProcessing.ENSEMBLE_CREATION) if model_proc == ModelProcessing.DEFAULT: named_metrics = { ModelExecutionMode.TRAIN: inference_on_train_set, ModelExecutionMode.TEST: inference_on_test_set, ModelExecutionMode.VAL: inference_on_val_set } else: named_metrics = { ModelExecutionMode.TRAIN: ensemble_inference_on_train_set, ModelExecutionMode.TEST: ensemble_inference_on_test_set, ModelExecutionMode.VAL: ensemble_inference_on_val_set } error = '' expected_upload_folder_count = 0 for mode, flag in named_metrics.items(): if mode in metrics: metric = metrics[mode] assert isinstance(metric, InferenceMetricsForSegmentation) if flag is None: # No override supplied, calculate the expected default: if model_proc == ModelProcessing.DEFAULT: if not perform_cross_validation: # If a "normal" run then default to val or test. flag = mode == ModelExecutionMode.TEST else: # If an ensemble child then default to never. flag = False else: # If an ensemble then default to test only. flag = mode == ModelExecutionMode.TEST if mode in metrics and not flag: error = error + f"Error: {mode.value} cannot be not None." elif mode not in metrics and flag: error = error + f"Error: {mode.value} cannot be None." results_folder = config.outputs_folder / get_best_epoch_results_path( mode, model_proc) folder_exists = results_folder.is_dir() assert folder_exists == flag if flag and model_proc == ModelProcessing.ENSEMBLE_CREATION: expected_upload_folder_count = expected_upload_folder_count + 1 expected_name = get_best_epoch_results_path( mode, ModelProcessing.DEFAULT) run.upload_folder.assert_any_call(name=str(expected_name), path=str(results_folder)) if len(error): raise ValueError(error) if model_proc == ModelProcessing.ENSEMBLE_CREATION: # The report should have been mock uploaded expected_upload_folder_count = expected_upload_folder_count + 1 assert run.upload_folder.call_count == expected_upload_folder_count
def test_model_test(test_output_dirs: OutputFolderForTests) -> None: train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") seed_everything(42) config = DummyModel() config.set_output_to(test_output_dirs.root_dir) placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) df = df[df.subject.isin([1, 2])] # noinspection PyTypeHints config._datasets_for_inference = \ {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)} # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. create_model_and_store_checkpoint( config, config.checkpoint_folder / BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test( config, data_split=execution_mode, checkpoint_handler=checkpoint_handler) epoch_dir = config.outputs_folder / get_best_epoch_results_path( execution_mode) assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match( epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME, train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME) assert_text_files_match( epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [117], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [117], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique( )), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()
def inference_identity( test_output_dirs: OutputFolderForTests, image_size: Any = (4, 5, 8), crop_size: Any = (5, 5, 5), shrink_by: Any = (0, 0, 0), num_classes: int = 5, create_mask: bool = True, extract_largest_foreground_connected_component: bool = False, is_ensemble: bool = False, posterior_smoothing_mm: Any = None) -> None: """ Test to make sure inference pipeline is identity preserving, ie: we can recreate deterministic model output, ensuring the patching and stitching is robust. """ # fix random seed np.random.seed(0) ground_truth_ids = list(map(str, range(num_classes))) # image to run inference on: The mock model passes the input image through, hence the input # image must have as many channels as we have classes (plus background), such that the output is # also a valid posterior. num_channels = num_classes + 1 image_channels = np.random.randn(num_channels, *list(image_size)) # create a random mask if required mask = np.round(np.random.uniform( size=image_size)).astype(np.int) if create_mask else None config = InferenceIdentityModel(shrink_by=shrink_by) config.crop_size = crop_size config.test_crop_size = crop_size config.image_channels = list(map(str, range(num_channels))) config.ground_truth_ids = ground_truth_ids config.posterior_smoothing_mm = posterior_smoothing_mm # We have to set largest_connected_component_foreground_classes after creating the model config, # because this parameter is not overridable and hence will not be set by GenericConfig's constructor. if extract_largest_foreground_connected_component: config.largest_connected_component_foreground_classes = [ (c, None) for c in ground_truth_ids ] # set expected posteriors expected_posteriors = torch.nn.functional.softmax( torch.tensor(image_channels), dim=0).numpy() # apply the mask if required if mask is not None: expected_posteriors = image_util.apply_mask_to_posteriors( expected_posteriors, mask) if posterior_smoothing_mm is not None: expected_posteriors = image_util.gaussian_smooth_posteriors( posteriors=expected_posteriors, kernel_size_mm=posterior_smoothing_mm, voxel_spacing_mm=(1, 1, 1)) # compute expected segmentation expected_segmentation = image_util.posteriors_to_segmentation( expected_posteriors) if extract_largest_foreground_connected_component: largest_component = image_util.extract_largest_foreground_connected_component( multi_label_array=expected_segmentation) # make sure the test data is accurate by checking if more than one component exists assert not np.array_equal(largest_component, expected_segmentation) expected_segmentation = largest_component # instantiate the model checkpoint = test_output_dirs.root_dir / "checkpoint.ckpt" create_model_and_store_checkpoint(config, checkpoint_path=checkpoint) # create single or ensemble inference pipeline inference_pipeline = InferencePipeline.create_from_checkpoint( path_to_checkpoint=checkpoint, model_config=config) assert inference_pipeline is not None full_image_inference_pipeline = EnsemblePipeline([inference_pipeline], config) \ if is_ensemble else inference_pipeline # compute full image inference results inference_result = full_image_inference_pipeline \ .predict_and_post_process_whole_image(image_channels=image_channels, mask=mask, voxel_spacing_mm=(1, 1, 1)) # Segmentation must have same size as input image assert inference_result.segmentation.shape == image_size assert inference_result.posteriors.shape == (num_classes + 1, ) + image_size # check that the posteriors and segmentations are as expected. Flatten to a list so that the error # messages are more informative. assert np.allclose(inference_result.posteriors, expected_posteriors) assert np.array_equal(inference_result.segmentation, expected_segmentation)
def test_model_test(test_output_dirs: OutputFolderForTests, use_partial_ground_truth: bool, allow_partial_ground_truth: bool) -> None: """ Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test :param test_output_dirs: The fixture in conftest.py :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to """ train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") seed_everything(42) config = DummyModel() config.allow_incomplete_labels = allow_partial_ground_truth config.set_output_to(test_output_dirs.root_dir) placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) if use_partial_ground_truth: config.check_exclusive = False config.ground_truth_ids = ["region", "region_1"] # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4, # and 5 are in the test dataset with: # Patient 3 has one missing ground truth channel: "region" df = df[df["subject"].ne(3) | df["channel"].ne("region")] # Patient 4 has all missing ground truth channels: "region", "region_1" df = df[df["subject"].ne(4) | df["channel"].ne("region")] df = df[df["subject"].ne(4) | df["channel"].ne("region_1")] # Patient 5 has no missing ground truth channels. config.dataset_data_frame = df df = df[df.subject.isin([3, 4, 5])] config.train_subject_ids = ['1', '2'] config.test_subject_ids = ['3', '4', '5'] config.val_subject_ids = ['6', '7'] else: df = df[df.subject.isin([1, 2])] if use_partial_ground_truth and not allow_partial_ground_truth: with pytest.raises(ValueError) as value_error: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore assert "Patient 3 does not have channel 'region'" in str( value_error.value) return else: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. create_model_and_store_checkpoint( config, config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test( config, execution_mode=execution_mode, checkpoint_paths=checkpoint_handler.get_checkpoints_to_test()) epoch_dir = config.outputs_folder / get_best_epoch_results_path( execution_mode) total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower( ) if not total_num_patients_column_name.endswith("s"): total_num_patients_column_name += "s" if use_partial_ground_truth: num_subjects = len(pd.unique(df["subject"])) if allow_partial_ground_truth: assert csv_column_contains_value( csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE, column_name=total_num_patients_column_name, value=num_subjects, contains_only_value=True) assert csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) else: aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE) assert total_num_patients_column_name not in aggregates_df.columns # Only added if using partial ground truth assert not csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match( epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME, train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME) assert_text_files_match( epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [117], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [117], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique( )), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()