def plot_cross_validation_from_files( config_and_files: OfflineCrossvalConfigAndFiles, root_folder: Path) -> None: config = config_and_files.config if config.number_of_cross_validation_splits > 1: check_result_file_counts(config_and_files) result_files = config_and_files.files metrics_dfs = load_dataframes(result_files, config) full_csv_file = root_folder / FULL_METRICS_DATAFRAME_FILE initial_metrics = pd.concat(list(metrics_dfs.values())) if config.model_category == ModelCategory.Segmentation: if config.create_plots: plot_metrics(config, metrics_dfs, root_folder) save_outliers(config, metrics_dfs, root_folder) all_metrics, focus_splits = add_comparison_data( config, initial_metrics) all_metrics.to_csv(full_csv_file, index=False) run_statistical_tests_on_file(root_folder, full_csv_file, config, focus_splits) else: # For classification runs, we also want to compute the aggregated training metrics for # each fold. metrics = ScalarMetricsDict.load_execution_mode_metrics_from_df( initial_metrics, config.model_category == ModelCategory.Classification) ScalarMetricsDict.aggregate_and_save_execution_mode_metrics( metrics=metrics, data_frame_logger=DataframeLogger(csv_path=root_folder / METRICS_AGGREGATES_FILE)) # The full metrics file saves the prediction for each individual subject. Do not include the training # results in this file (as in cross-validation a subject is used in several folds.) val_and_test_metrics = initial_metrics.loc[initial_metrics[ LoggingColumns.DataSplit.value] != ModelExecutionMode.TRAIN.value] val_and_test_metrics.to_csv(full_csv_file, index=False) # Copy one instance of the dataset.CSV files to the root of the results folder. It is possible # that the different CV folds run with different dataset files, but not expected for classification # models at the moment (could change with ensemble models) dataset_csv = None for file in result_files: if file.dataset_csv_file: dataset_csv = file.dataset_csv_file break if dataset_csv: shutil.copy(str(dataset_csv), str(root_folder)) name_dct = config_and_files.config.short_names if name_dct: pairs = [(val, key) for key, val in name_dct.items()] with Path(root_folder / RUN_DICTIONARY_NAME).open("w") as out: max_len = max(len(short_name) for short_name, _ in pairs) for short_name, long_name in sorted(pairs): out.write(f"{short_name:{max_len}s} {long_name}\n")
def test_load_metrics_from_df_with_hues(test_output_dirs: TestOutputDirectories) -> None: """ Test if we can re-create a MetricsDict object with model predictions and labels, when the data file contains a prediction target value. """ df_str = """prediction_target,epoch,subject,model_output,label,cross_validation_split_index,data_split 01,1,2137.00005,0.54349,1.0,0,Val 01,1,2137.00125,0.54324,0.0,1,Val 01,1,3250.00005,0.50822,0.0,0,Val 01,1,3250.12345,0.47584,0.0,1,Val 02,1,2137.00005,0.55538,1.0,0,Val 02,1,2137.00125,0.55759,0.0,1,Val 02,1,3250.00005,0.47255,0.0,0,Val 02,1,3250.12345,0.46996,0.0,1,Val 03,1,2137.00005,0.56670,1.0,0,Val 03,1,2137.00125,0.57003,0.0,1,Val 03,1,3250.00005,0.46321,0.0,0,Val 03,1,3250.12345,0.47309,0.0,1,Val """ df = pd.read_csv(StringIO(df_str), converters={LoggingColumns.Hue.value: lambda x: x}) metrics = ScalarMetricsDict.load_execution_mode_metrics_from_df(df, is_classification_metrics=True) mode = ModelExecutionMode.VAL epoch = 1 assert mode in metrics assert epoch in metrics[mode] metrics_dict = metrics[mode][epoch] expected_hues = ["01", "02", "03"] assert metrics_dict.get_hue_names(include_default=False) == expected_hues for hue in expected_hues: assert len(metrics_dict._get_hue(hue).get_predictions()) == 4 logger_output_file = test_output_dirs.create_file_or_folder_path("output.csv") logger = DataframeLogger(csv_path=Path(logger_output_file)) ScalarMetricsDict.aggregate_and_save_execution_mode_metrics(metrics, logger) output = pd.read_csv(logger_output_file, dtype=str) assert LoggingColumns.Hue.value in output assert list(output[LoggingColumns.Hue.value]) == expected_hues assert LoggingColumns.DataSplit.value in output assert list(output[LoggingColumns.DataSplit.value].unique()) == [ModelExecutionMode.VAL.value] assert LoggingColumns.Epoch.value in output assert list(output[LoggingColumns.Epoch.value].unique()) == ["1"] assert LoggingColumns.AreaUnderPRCurve.value in output assert list(output[LoggingColumns.AreaUnderPRCurve.value]) == ['1.00000', '0.25000', '0.25000']