def test_save_file(value: Any, expected: Any) -> None: file = full_ml_test_data_path("test.txt") io_util.save_lines_to_file(Path(file), value) assert_file_contains_string(file, expected) os.remove(str(file))
def test_metrics_file(test_output_dirs: OutputFolderForTests) -> None: """Test if metrics files with Dice scores are written as expected.""" folder = test_output_dirs.make_sub_dir("test_metrics_file") def new_file(suffix: str) -> Path: file = folder / suffix if file.is_file(): file.unlink() return file d = MetricsPerPatientWriter() p1 = "Patient1" p2 = "Patient2" p3 = "Patient3" liver = "liver" kidney = "kidney" # Ordering for test data: For "liver", patient 2 has the lowest score, sorting should move them first # For "kidney", patient 1 has the lowest score and should be first. d.add(p1, liver, 1.0, 1.0, 0.5) d.add(p1, liver, 0.4, 1.0, 0.4) d.add(p2, liver, 0.8, 1.0, 0.3) d.add(p2, kidney, 0.7, 1.0, 0.2) d.add(p3, kidney, 0.4, 1.0, 0.1) metrics_file = new_file("metrics_file.csv") d.to_csv(Path(metrics_file)) # Sorting should be first by structure name alphabetically, then Dice with lowest scores first. assert_file_contains_string( metrics_file, "Patient,Structure,Dice,HausdorffDistance_mm,MeanDistance_mm\n" "Patient3,kidney,0.400,1.000,0.100\n" "Patient2,kidney,0.700,1.000,0.200\n" "Patient1,liver,0.400,1.000,0.400\n" "Patient2,liver,0.800,1.000,0.300\n" "Patient1,liver,1.000,1.000,0.500\n") aggregates_file = new_file(METRICS_AGGREGATES_FILE) d.save_aggregates_to_csv(Path(aggregates_file)) # Sorting should be first by structure name alphabetically, then Dice with lowest scores first. assert_text_files_match(Path(aggregates_file), full_ml_test_data_path() / METRICS_AGGREGATES_FILE) boxplot_per_structure(d.to_data_frame(), column_name=MetricsFileColumns.DiceNumeric.value, title="Dice score") boxplot1 = new_file("boxplot_2class.png") resize_and_save(5, 4, boxplot1) plt.clf() d.add(p1, "lung", 0.5, 2.0, 1.0) d.add(p1, "foo", 0.9, 2.0, 1.0) d.add(p1, "bar", 0.9, 2.0, 1.0) d.add(p1, "baz", 0.9, 2.0, 1.0) boxplot_per_structure(d.to_data_frame(), column_name=MetricsFileColumns.DiceNumeric.value, title="Dice score") boxplot2 = new_file("boxplot_6class.png") resize_and_save(5, 4, boxplot2)
def test_model_test(test_output_dirs: OutputFolderForTests) -> None: train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") config = DummyModel() config.set_output_to(test_output_dirs.root_dir) epoch = 1 config.num_epochs = epoch assert config.get_test_epochs() == [epoch] placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) df = df[df.subject.isin([1, 2])] # noinspection PyTypeHints config._datasets_for_inference = \ {ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform)} # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler(model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. stored_checkpoints = full_ml_test_data_path("checkpoints") shutil.copytree(str(stored_checkpoints), str(config.checkpoint_folder)) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test(config, data_split=execution_mode, checkpoint_handler=checkpoint_handler) epoch_dir = config.outputs_folder / get_epoch_results_path(epoch, execution_mode) assert inference_results.epochs[epoch] == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match(epoch_dir / model_testing.METRICS_FILE_NAME, train_and_test_data_dir / model_testing.METRICS_FILE_NAME) assert_text_files_match(epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [136], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [136], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [118], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [118], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique()), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()
def _test_model_train(output_dirs: OutputFolderForTests, image_channels: Any, ground_truth_ids: Any, no_mask_channel: bool = False) -> None: def _check_patch_centers(epoch_results: List[MetricsDict], should_equal: bool) -> None: diagnostics_per_epoch = [m.diagnostics[MetricType.PATCH_CENTER.value] for m in epoch_results] patch_centers_epoch1 = diagnostics_per_epoch[0] for diagnostic in diagnostics_per_epoch[1:]: assert np.array_equal(patch_centers_epoch1, diagnostic) == should_equal train_config = DummyModel() train_config.local_dataset = base_path train_config.set_output_to(output_dirs.root_dir) train_config.image_channels = image_channels train_config.ground_truth_ids = ground_truth_ids train_config.mask_id = None if no_mask_channel else train_config.mask_id train_config.random_seed = 42 train_config.class_weights = [0.5, 0.25, 0.25] train_config.store_dataset_sample = True expected_train_losses = [0.455538, 0.455213] expected_val_losses = [0.455190, 0.455139] expected_stats = "Epoch\tLearningRate\tTrainLoss\tTrainDice\tValLoss\tValDice\n" \ "1\t1.00e-03\t0.456\t0.242\t0.455\t0.000\n" \ "2\t5.36e-04\t0.455\t0.247\t0.455\t0.000" expected_learning_rates = [[train_config.l_rate], [5.3589e-4]] loss_absolute_tolerance = 1e-3 checkpoint_handler = get_default_checkpoint_handler(model_config=train_config, project_root=Path(output_dirs.root_dir)) model_training_result = model_training.model_train(train_config, checkpoint_handler=checkpoint_handler) assert isinstance(model_training_result, ModelTrainingResults) # check to make sure training batches are NOT all the same across epochs _check_patch_centers(model_training_result.train_results_per_epoch, should_equal=False) # check to make sure validation batches are all the same across epochs _check_patch_centers(model_training_result.val_results_per_epoch, should_equal=True) assert isinstance(model_training_result.train_results_per_epoch[0], MetricsDict) actual_train_losses = [m.get_single_metric(MetricType.LOSS) for m in model_training_result.train_results_per_epoch] actual_val_losses = [m.get_single_metric(MetricType.LOSS) for m in model_training_result.val_results_per_epoch] print("actual_train_losses = {}".format(actual_train_losses)) print("actual_val_losses = {}".format(actual_val_losses)) assert np.allclose(actual_train_losses, expected_train_losses, atol=loss_absolute_tolerance) assert np.allclose(actual_val_losses, expected_val_losses, atol=loss_absolute_tolerance) assert np.allclose(model_training_result.learning_rates_per_epoch, expected_learning_rates, rtol=1e-6) # check output files/directories assert train_config.outputs_folder.is_dir() assert train_config.logs_folder.is_dir() # The train and val folder should contain Tensorflow event files assert (train_config.logs_folder / "train").is_dir() assert (train_config.logs_folder / "val").is_dir() assert len([(train_config.logs_folder / "train").glob("*")]) == 1 assert len([(train_config.logs_folder / "val").glob("*")]) == 1 # Checkpoint folder # With these settings, we should see a checkpoint only at epoch 2: # That's the last epoch, and there should always be checkpoint at the last epoch) assert train_config.save_start_epoch == 1 assert train_config.save_step_epochs == 100 assert train_config.num_epochs == 2 assert train_config.checkpoint_folder.is_dir() assert (train_config.checkpoint_folder / ("2" + CHECKPOINT_FILE_SUFFIX)).is_file() assert (train_config.outputs_folder / DATASET_CSV_FILE_NAME).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.TRAIN]).is_file() assert (train_config.outputs_folder / STORED_CSV_FILE_NAMES[ModelExecutionMode.VAL]).is_file() assert_file_contains_string(train_config.outputs_folder / TRAIN_STATS_FILE, expected_stats) # Test for saving of example images assert train_config.example_images_folder.is_dir() example_files = list(train_config.example_images_folder.rglob("*.*")) assert len(example_files) == 3 * 2 # Path visualization: There should be 3 slices for each of the 2 subjects sampling_folder = train_config.outputs_folder / PATCH_SAMPLING_FOLDER assert sampling_folder.is_dir() assert train_config.show_patch_sampling > 0 assert len(list(sampling_folder.rglob("*.png"))) == 3 * train_config.show_patch_sampling
def test_model_test(test_output_dirs: OutputFolderForTests, use_partial_ground_truth: bool, allow_partial_ground_truth: bool) -> None: """ Check the CSVs (and image files) output by InnerEye.ML.model_testing.segmentation_model_test :param test_output_dirs: The fixture in conftest.py :param use_partial_ground_truth: Whether to remove some ground truth labels from some test users :param allow_partial_ground_truth: What to set the allow_incomplete_labels flag to """ train_and_test_data_dir = full_ml_test_data_path("train_and_test_data") seed_everything(42) config = DummyModel() config.allow_incomplete_labels = allow_partial_ground_truth config.set_output_to(test_output_dirs.root_dir) placeholder_dataset_id = "place_holder_dataset_id" config.azure_dataset_id = placeholder_dataset_id transform = config.get_full_image_sample_transforms().test df = pd.read_csv(full_ml_test_data_path(DATASET_CSV_FILE_NAME)) if use_partial_ground_truth: config.check_exclusive = False config.ground_truth_ids = ["region", "region_1"] # As in Tests.ML.pipelines.test.inference.test_evaluate_model_predictions patients 3, 4, # and 5 are in the test dataset with: # Patient 3 has one missing ground truth channel: "region" df = df[df["subject"].ne(3) | df["channel"].ne("region")] # Patient 4 has all missing ground truth channels: "region", "region_1" df = df[df["subject"].ne(4) | df["channel"].ne("region")] df = df[df["subject"].ne(4) | df["channel"].ne("region_1")] # Patient 5 has no missing ground truth channels. config.dataset_data_frame = df df = df[df.subject.isin([3, 4, 5])] config.train_subject_ids = ['1', '2'] config.test_subject_ids = ['3', '4', '5'] config.val_subject_ids = ['6', '7'] else: df = df[df.subject.isin([1, 2])] if use_partial_ground_truth and not allow_partial_ground_truth: with pytest.raises(ValueError) as value_error: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore assert "Patient 3 does not have channel 'region'" in str( value_error.value) return else: # noinspection PyTypeHints config._datasets_for_inference = { ModelExecutionMode.TEST: FullImageDataset(config, df, full_image_sample_transforms=transform) } # type: ignore execution_mode = ModelExecutionMode.TEST checkpoint_handler = get_default_checkpoint_handler( model_config=config, project_root=test_output_dirs.root_dir) # Mimic the behaviour that checkpoints are downloaded from blob storage into the checkpoints folder. create_model_and_store_checkpoint( config, config.checkpoint_folder / LAST_CHECKPOINT_FILE_NAME_WITH_SUFFIX) checkpoint_handler.additional_training_done() inference_results = model_testing.segmentation_model_test( config, execution_mode=execution_mode, checkpoint_paths=checkpoint_handler.get_checkpoints_to_test()) epoch_dir = config.outputs_folder / get_best_epoch_results_path( execution_mode) total_num_patients_column_name = f"total_{MetricsFileColumns.Patient.value}".lower( ) if not total_num_patients_column_name.endswith("s"): total_num_patients_column_name += "s" if use_partial_ground_truth: num_subjects = len(pd.unique(df["subject"])) if allow_partial_ground_truth: assert csv_column_contains_value( csv_file_path=epoch_dir / METRICS_AGGREGATES_FILE, column_name=total_num_patients_column_name, value=num_subjects, contains_only_value=True) assert csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) else: aggregates_df = pd.read_csv(epoch_dir / METRICS_AGGREGATES_FILE) assert total_num_patients_column_name not in aggregates_df.columns # Only added if using partial ground truth assert not csv_column_contains_value( csv_file_path=epoch_dir / SUBJECT_METRICS_FILE_NAME, column_name=MetricsFileColumns.Dice.value, value='', contains_only_value=False) assert inference_results.metrics == pytest.approx(0.66606902, abs=1e-6) assert config.outputs_folder.is_dir() assert epoch_dir.is_dir() patient1 = io_util.load_nifti_image(train_and_test_data_dir / "id1_channel1.nii.gz") patient2 = io_util.load_nifti_image(train_and_test_data_dir / "id2_channel1.nii.gz") assert_file_contains_string(epoch_dir / DATASET_ID_FILE, placeholder_dataset_id) assert_file_contains_string(epoch_dir / GROUND_TRUTH_IDS_FILE, "region") assert_text_files_match( epoch_dir / model_testing.SUBJECT_METRICS_FILE_NAME, train_and_test_data_dir / model_testing.SUBJECT_METRICS_FILE_NAME) assert_text_files_match( epoch_dir / model_testing.METRICS_AGGREGATES_FILE, train_and_test_data_dir / model_testing.METRICS_AGGREGATES_FILE) # Plotting results vary between platforms. Can only check if the file is generated, but not its contents. assert (epoch_dir / model_testing.BOXPLOT_FILE).exists() assert_nifti_content(epoch_dir / "001" / "posterior_region.nii.gz", get_image_shape(patient1), patient1.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_region.nii.gz", get_image_shape(patient2), patient2.header, [137], np.ubyte) assert_nifti_content(epoch_dir / "001" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient1), patient1.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "002" / DEFAULT_RESULT_IMAGE_NAME, get_image_shape(patient2), patient2.header, [1], np.ubyte) assert_nifti_content(epoch_dir / "001" / "posterior_background.nii.gz", get_image_shape(patient1), patient1.header, [117], np.ubyte) assert_nifti_content(epoch_dir / "002" / "posterior_background.nii.gz", get_image_shape(patient2), patient2.header, [117], np.ubyte) thumbnails_folder = epoch_dir / model_testing.THUMBNAILS_FOLDER assert thumbnails_folder.is_dir() png_files = list(thumbnails_folder.glob("*.png")) overlays = [f for f in png_files if "_region_slice_" in str(f)] assert len(overlays) == len(df.subject.unique( )), "There should be one overlay/contour file per subject" # Writing dataset.csv normally happens at the beginning of training, # but this test reads off a saved checkpoint file. # Dataset.csv must be present for plot_cross_validation. config.write_dataset_files() # Test if the metrics files can be picked up correctly by the cross validation code config_and_files = get_config_and_results_for_offline_runs(config) result_files = config_and_files.files assert len(result_files) == 1 for file in result_files: assert file.execution_mode == execution_mode assert file.dataset_csv_file is not None assert file.dataset_csv_file.exists() assert file.metrics_file is not None assert file.metrics_file.exists()