예제 #1
0
 def try_compare_scores_against_baselines(self, model_proc: ModelProcessing) -> None:
     """
     Attempt comparison of scores against baseline scores and scatterplot creation if possible.
     """
     if not isinstance(self.model_config, SegmentationModelBase):  # keep type checker happy
         return
     from InnerEye.ML.baselines_util import compare_scores_against_baselines
     with logging_section("Comparing scores against baselines"):
         compare_scores_against_baselines(self.model_config, self.azure_config, model_proc)
def test_compare_scores_against_baselines_throws(model_proc_split_infer: Tuple[
    ModelProcessing, int,
    Optional[bool]], test_output_dirs: OutputFolderForTests,
                                                 caplog: Any) -> None:
    """
    Test that exceptions are raised if the files necessary for baseline comparison are missing,
    then test that when all required files are present that baseline comparison files are written.

    :param model_proc: Model processing to test.
    :param test_output_dirs: Test output directories.
    :param caplog: Pytest capture logger.
    :return: None.
    """
    (model_proc, number_of_cross_validation_splits,
     inference_on_test_set) = model_proc_split_infer
    config = SegmentationModelBase(
        should_validate=False,
        comparison_blob_storage_paths=[
            ('Single', 'dummy_blob_single/outputs/epoch_120/Test'),
            ('5fold', 'dummy_blob_ensemble/outputs/epoch_120/Test')
        ],
        number_of_cross_validation_splits=number_of_cross_validation_splits)
    config.set_output_to(test_output_dirs.root_dir)

    azure_config = get_default_azure_config()

    # Disable inference
    config.inference_on_test_set = False
    config.ensemble_inference_on_test_set = False

    with caplog.at_level(logging.INFO):
        # With no inference output, test that a warning is written to logging
        compare_scores_against_baselines(model_config=config,
                                         azure_config=azure_config,
                                         model_proc=model_proc)
        assert INFERENCE_DISABLED_WARNING in caplog.text

    # Reset inference
    config.inference_on_test_set = inference_on_test_set
    config.ensemble_inference_on_test_set = None

    # If the BEST_EPOCH_FOLDER_NAME folder is missing, expect an exception to be raised.
    with pytest.raises(FileNotFoundError) as ex:
        compare_scores_against_baselines(model_config=config,
                                         azure_config=azure_config,
                                         model_proc=model_proc)
    assert "Cannot compare scores against baselines: no best epoch results found at" in str(
        ex)

    best_epoch_folder_path = config.outputs_folder
    if model_proc == ModelProcessing.ENSEMBLE_CREATION:
        best_epoch_folder_path = best_epoch_folder_path / OTHER_RUNS_SUBDIR_NAME / ENSEMBLE_SPLIT_NAME
    best_epoch_folder_path = best_epoch_folder_path / BEST_EPOCH_FOLDER_NAME / ModelExecutionMode.TEST.value

    best_epoch_folder_path.mkdir(parents=True)

    # If the BEST_EPOCH_FOLDER_NAME folder exists but DATASET_CSV_FILE_NAME is missing,
    # expect an exception to be raised.
    with pytest.raises(FileNotFoundError) as ex:
        compare_scores_against_baselines(model_config=config,
                                         azure_config=azure_config,
                                         model_proc=model_proc)
    assert "Not comparing with baselines because no " in str(ex)
    assert DATASET_CSV_FILE_NAME in str(ex)

    model_dataset_path = best_epoch_folder_path / DATASET_CSV_FILE_NAME
    dataset_df = create_dataset_df()
    dataset_df.to_csv(model_dataset_path)

    # If the BEST_EPOCH_FOLDER_NAME folder exists but SUBJECT_METRICS_FILE_NAME is missing,
    # expect an exception to be raised.
    with pytest.raises(FileNotFoundError) as ex:
        compare_scores_against_baselines(model_config=config,
                                         azure_config=azure_config,
                                         model_proc=model_proc)
    assert "Not comparing with baselines because no " in str(ex)
    assert SUBJECT_METRICS_FILE_NAME in str(ex)

    model_metrics_path = best_epoch_folder_path / SUBJECT_METRICS_FILE_NAME
    metrics_df = create_metrics_df()
    metrics_df.to_csv(model_metrics_path)

    baseline = create_comparison_baseline(dataset_df)

    # Patch get_comparison_baselines to return the baseline above.
    with mock.patch('InnerEye.ML.baselines_util.get_comparison_baselines',
                    return_value=[baseline]):
        compare_scores_against_baselines(model_config=config,
                                         azure_config=azure_config,
                                         model_proc=model_proc)

    # Check the wilcoxoon results file is present and has expected contents.
    wilcoxon_path = best_epoch_folder_path / BASELINE_WILCOXON_RESULTS_FILE
    assert wilcoxon_path.is_file()

    wilcoxon_lines = [
        line.strip() for line in wilcoxon_path.read_text().splitlines()
    ]
    check_wilcoxon_lines(wilcoxon_lines, baseline)

    # Check the full metrics results file is present.
    full_metrics_path = best_epoch_folder_path / FULL_METRICS_DATAFRAME_FILE
    assert full_metrics_path.is_file()