def test_save_outliers(test_config: PlotCrossValidationConfig, test_output_dirs: OutputFolderForTests) -> None: """Test to make sure the outlier file for a split is as expected""" test_config.outputs_directory = test_output_dirs.root_dir test_config.outlier_range = 0 assert test_config.run_recovery_id dataset_split_metrics = { x: _get_metrics_df(test_config.run_recovery_id, x) for x in [ModelExecutionMode.VAL] } save_outliers(test_config, dataset_split_metrics, test_config.outputs_directory) filename = f"{ModelExecutionMode.VAL.value}_outliers.txt" assert_text_files_match(full_file=test_config.outputs_directory / filename, expected_file=full_ml_test_data_path(filename)) # Now test without the CSV_INSTITUTION_HEADER and CSV_SERIES_HEADER columns, which will be missing in institutions' environments dataset_split_metrics_pruned = { x: _get_metrics_df(test_config.run_recovery_id, x).drop( columns=[CSV_INSTITUTION_HEADER, CSV_SERIES_HEADER], errors="ignore") for x in [ModelExecutionMode.VAL] } save_outliers(test_config, dataset_split_metrics_pruned, test_config.outputs_directory) test_data_filename = f"{ModelExecutionMode.VAL.value}_outliers_pruned.txt" assert_text_files_match( full_file=test_config.outputs_directory / filename, expected_file=full_ml_test_data_path(test_data_filename))
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig, test_output_dirs: OutputFolderForTests) -> None: """Test to make sure the outlier file for a split is as expected""" test_config_ensemble.outputs_directory = test_output_dirs.root_dir test_config_ensemble.outlier_range = 0 dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]} save_outliers(test_config_ensemble, dataset_split_metrics, test_config_ensemble.outputs_directory) f = f"{ModelExecutionMode.VAL.value}_outliers.txt" assert_text_files_match(full_file=test_config_ensemble.outputs_directory / f, expected_file=full_ml_test_data_path(f))
def test_metrics_file(test_output_dirs: OutputFolderForTests) -> None: """Test if metrics files with Dice scores are written as expected.""" folder = test_output_dirs.make_sub_dir("test_metrics_file") def new_file(suffix: str) -> Path: file = folder / suffix if file.is_file(): file.unlink() return file d = MetricsPerPatientWriter() p1 = "Patient1" p2 = "Patient2" p3 = "Patient3" liver = "liver" kidney = "kidney" # Ordering for test data: For "liver", patient 2 has the lowest score, sorting should move them first # For "kidney", patient 1 has the lowest score and should be first. d.add(p1, liver, 1.0, 1.0, 0.5) d.add(p1, liver, 0.4, 1.0, 0.4) d.add(p2, liver, 0.8, 1.0, 0.3) d.add(p2, kidney, 0.7, 1.0, 0.2) d.add(p3, kidney, 0.4, 1.0, 0.1) metrics_file = new_file("metrics_file.csv") d.to_csv(Path(metrics_file)) # Sorting should be first by structure name alphabetically, then Dice with lowest scores first. assert_file_contains_string( metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n" "Patient3,kidney,0.400,1.000,0.100\n" "Patient2,kidney,0.700,1.000,0.200\n" "Patient1,liver,0.400,1.000,0.400\n" "Patient2,liver,0.800,1.000,0.300\n" "Patient1,liver,1.000,1.000,0.500\n") aggregates_file = new_file(METRICS_AGGREGATES_FILE) d.save_aggregates_to_csv(Path(aggregates_file)) # Sorting should be first by structure name alphabetically, then Dice with lowest scores first. assert_text_files_match(Path(aggregates_file), full_ml_test_data_path() / METRICS_AGGREGATES_FILE) boxplot_per_structure(d.to_data_frame(), column_name=MetricsFileColumns.DiceNumeric.value, title="Dice score") boxplot1 = new_file("boxplot_2class.png") resize_and_save(5, 4, boxplot1) plt.clf() d.add(p1, "lung", 0.5, 2.0, 1.0) d.add(p1, "foo", 0.9, 2.0, 1.0) d.add(p1, "bar", 0.9, 2.0, 1.0) d.add(p1, "baz", 0.9, 2.0, 1.0) boxplot_per_structure(d.to_data_frame(), column_name=MetricsFileColumns.DiceNumeric.value, title="Dice score") boxplot2 = new_file("boxplot_6class.png") resize_and_save(5, 4, boxplot2)
def test_model_test(test_output_dirs: OutputFolderForTests) -> None: train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") config = DummyModel() config.set_output_to(test_output_dirs.root_dir) epoch = 1 config.num_epochs = epoch assert config.get_test_epochs() == [epoch] placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) df = df[df.subject.isin([1, 2])] # noinspection PyTypeHints config._datasets_for_inference = \ {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)} # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. stored_checkpoints = full_ml_test_data_path("checkpoints") shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder)) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test(config, data_split=execution_mode, checkpoint_handler=checkpoint_handler) epoch_dir = config.outputs_folder / get_epoch_results_path(epoch, execution_mode) assert inference_results.epochs[epoch] == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME, train_and_test_data_dir / model_testing.METRICS_FILE_NAME) assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [136], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [136], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [118], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [118], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique()), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()
def test_model_test(test_output_dirs: OutputFolderForTests, use_partial_ground_truth: bool, allow_partial_ground_truth: bool) -> None: """ Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test :param test_output_dirs: The fixture in conftest.py :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to """ train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") seed_everything(42) config = DummyModel() config.allow_incomplete_labels = allow_partial_ground_truth config.set_output_to(test_output_dirs.root_dir) placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) if use_partial_ground_truth: config.check_exclusive = False config.ground_truth_ids = ["region", "region_1"] # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4, # and 5 are in the test dataset with: # Patient 3 has one missing ground truth channel: "region" df = df[df["subject"].ne(3) | df["channel"].ne("region")] # Patient 4 has all missing ground truth channels: "region", "region_1" df = df[df["subject"].ne(4) | df["channel"].ne("region")] df = df[df["subject"].ne(4) | df["channel"].ne("region_1")] # Patient 5 has no missing ground truth channels. config.dataset_data_frame = df df = df[df.subject.isin([3, 4, 5])] config.train_subject_ids = ['1', '2'] config.test_subject_ids = ['3', '4', '5'] config.val_subject_ids = ['6', '7'] else: df = df[df.subject.isin([1, 2])] if use_partial_ground_truth and not allow_partial_ground_truth: with pytest.raises(ValueError) as value_error: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore assert "Patient 3 does not have channel 'region'" in str( value_error.value) return else: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. create_model_and_store_checkpoint( config, config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test( config, execution_mode=execution_mode, checkpoint_paths=checkpoint_handler.get_checkpoints_to_test()) epoch_dir = config.outputs_folder / get_best_epoch_results_path( execution_mode) total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower( ) if not total_num_patients_column_name.endswith("s"): total_num_patients_column_name += "s" if use_partial_ground_truth: num_subjects = len(pd.unique(df["subject"])) if allow_partial_ground_truth: assert csv_column_contains_value( csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE, column_name=total_num_patients_column_name, value=num_subjects, contains_only_value=True) assert csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) else: aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE) assert total_num_patients_column_name not in aggregates_df.columns # Only added if using partial ground truth assert not csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match( epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME, train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME) assert_text_files_match( epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [117], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [117], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique( )), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()