def compare_scores_against_baselines(model_config: SegmentationModelBase,
                                     azure_config: AzureConfig,
                                     model_proc: ModelProcessing) -> None:
    """
    If the model config has any baselines to compare against, loads the metrics.csv file that should just have
    been written for the last epoch of the current run, and its dataset.csv. Do the same for all the baselines,
    whose corresponding files should be in the repository already. For each baseline, call the Wilcoxon signed-rank test
    on pairs consisting of Dice scores from the current model and the baseline, and print out comparisons to
    the Wilcoxon results file.
    """
    # The attribute will only be present for a segmentation model; and it might be None or empty even for that.
    comparison_blob_storage_paths = model_config.comparison_blob_storage_paths
    if not comparison_blob_storage_paths:
        return
    outputs_path = model_config.outputs_folder / get_best_epoch_results_path(
        ModelExecutionMode.TEST, model_proc)
    if not outputs_path.is_dir():
        if not model_config.is_inference_required(model_proc,
                                                  ModelExecutionMode.TEST):
            logging.info(INFERENCE_DISABLED_WARNING)
            return
        raise FileNotFoundError(
            f"Cannot compare scores against baselines: no best epoch results found at {outputs_path}"
        )
    model_metrics_path = outputs_path / SUBJECT_METRICS_FILE_NAME
    model_dataset_path = outputs_path / DATASET_CSV_FILE_NAME
    if not model_dataset_path.exists():
        raise FileNotFoundError(
            f"Not comparing with baselines because no {model_dataset_path} file found for this run"
        )
    if not model_metrics_path.exists():
        raise FileNotFoundError(
            f"Not comparing with baselines because no {model_metrics_path} file found for this run"
        )
    model_metrics_df = pd.read_csv(model_metrics_path)
    model_dataset_df = pd.read_csv(model_dataset_path)
    comparison_result = download_and_compare_scores(
        outputs_path, azure_config, comparison_blob_storage_paths,
        model_dataset_df, model_metrics_df)
    full_metrics_path = str(outputs_path / FULL_METRICS_DATAFRAME_FILE)
    comparison_result.dataframe.to_csv(full_metrics_path)
    if comparison_result.did_comparisons:
        wilcoxon_path = outputs_path / BASELINE_WILCOXON_RESULTS_FILE
        logging.info(
            f"Wilcoxon tests of current {model_proc.value} model against baseline(s), "
            f"written to {wilcoxon_path}:")
        for line in comparison_result.wilcoxon_lines:
            logging.info(line)
        logging.info("End of Wilcoxon test results")
        may_write_lines_to_file(comparison_result.wilcoxon_lines,
                                wilcoxon_path)
    write_to_scatterplot_directory(outputs_path, comparison_result.plots)
def test_inference_required_crossval_runs() -> None:
    """
    Test the flags for running full inference on the test set, for models that are trained in crossval mode.
    """
    classification_model = GlaucomaPublic()
    classification_model.number_of_cross_validation_splits = 2
    segmentation_model = SegmentationModelBase(should_validate=False)
    segmentation_model.number_of_cross_validation_splits = 2
    assert classification_model.perform_cross_validation
    assert segmentation_model.perform_cross_validation
    # Cross validation child runs for classification models need test set inference to ensure that the report works
    # correctly.
    assert classification_model.is_inference_required(
        model_proc=ModelProcessing.DEFAULT, data_split=ModelExecutionMode.TEST)
    # For models other than classification models, there is by default no inference on the test set.
    assert not segmentation_model.is_inference_required(
        model_proc=ModelProcessing.DEFAULT, data_split=ModelExecutionMode.TEST)
    classification_model.inference_on_test_set = False
    # If a flag is set explicitly, use that.
    assert not classification_model.is_inference_required(
        model_proc=ModelProcessing.DEFAULT, data_split=ModelExecutionMode.TEST)