Beispiel #1
0
def test_load_files_with_prediction_target() -> None:
    """
    For multi-week RNNs that predict at multiple sequence points: Test that the dataframes
    including the prediction_target column can be loaded.
    """
    folder = "multi_label_sequence_in_crossval"
    plotting_config = PlotCrossValidationConfig(
        run_recovery_id="foo",
        epoch=1,
        model_category=ModelCategory.Classification)
    files = create_run_result_file_list(plotting_config, folder)

    downloaded_metrics = load_dataframes(files, plotting_config)
    assert ModelExecutionMode.TEST not in downloaded_metrics
    metrics = downloaded_metrics[ModelExecutionMode.VAL]
    assert metrics is not None
    assert LoggingColumns.Hue.value in metrics
    # The prediction target column should always be read as a string, because we will later use it to create
    # hue values for a MetricsDict.
    assert is_string_dtype(metrics[LoggingColumns.Hue.value].dtype)
    assert LoggingColumns.Epoch.value in metrics
    assert LoggingColumns.Patient.value in metrics
    assert len(metrics[LoggingColumns.Hue.value].unique()) == 3
    # Each of the two CV folds has 2 distinct subjects
    assert len(metrics[LoggingColumns.Patient.value].unique()) == 4
def test_metrics_preparation_for_classification(
        perform_sub_fold_cross_validation: bool) -> None:
    """
    Test if metrics from classification models can be loaded and prepared. The files in question are checked in,
    and were downloaded from a run on AzureML.
    """
    files, plotting_config = load_result_files_for_classification(
        perform_sub_fold_cross_validation)
    downloaded_metrics = load_dataframes(files, plotting_config)
    assert ModelExecutionMode.TEST not in downloaded_metrics
    metrics = downloaded_metrics[ModelExecutionMode.VAL]
    assert metrics is not None
    expected_metrics_file = "metrics_preparation_for_sub_fold_classification_VAL.csv" \
        if perform_sub_fold_cross_validation else "metrics_preparation_for_classification_VAL.csv"
    expected_df_csv = full_ml_test_data_path(
        "plot_cross_validation") / expected_metrics_file
    metrics = metrics.sort_values(list(metrics.columns),
                                  ascending=True).reset_index(drop=True)
    # To write new test results:
    # metrics.to_csv(expected_df_csv, index=False)
    expected_df = pd.read_csv(expected_df_csv).sort_values(
        list(metrics.columns), ascending=True).reset_index(drop=True)
    pd.testing.assert_frame_equal(expected_df,
                                  metrics,
                                  check_like=True,
                                  check_dtype=False)
Beispiel #3
0
def test_metrics_preparation_for_segmentation(test_config_ensemble: PlotCrossValidationConfig) -> None:
    """
    Test if metrics dataframes can be loaded and prepared. The files in question are checked in, but
    were downloaded from a run, ID given in DEFAULT_ENSEMBLE_RUN_RECOVERY_ID.
    """
    files = create_file_list_for_segmentation_recovery_run(test_config_ensemble)
    downloaded_metrics = load_dataframes(files, test_config_ensemble)
    for mode in test_config_ensemble.execution_modes_to_download():
        expected_df = _get_metrics_df(mode)
        # Drop the "mode" column, because that was added after creating the test data
        metrics = downloaded_metrics[mode]
        assert metrics is not None
        actual_df = metrics.drop(COL_MODE, axis=1)
        actual_df = actual_df.sort_values(list(actual_df.columns), ascending=True).reset_index(drop=True)
        pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_dtype=False)
def test_metrics_preparation_for_segmentation(
        drop_column: Optional[str], test_config: PlotCrossValidationConfig,
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test if metrics dataframes can be loaded and prepared. The files in question are checked in, but
    were downloaded from a run, ID given in DEFAULT_ENSEMBLE_RUN_RECOVERY_ID.
    Additionally test that CSV_INSTITUTION_HEADER or CSV_SERIES_HEADER can be dropped from the dataset_csv_file.
    """
    files = create_file_list_for_segmentation_recovery_run(test_config)
    if drop_column:

        def drop_csv_column(path: Path) -> None:
            """
            Load a csv file, drop a column, and save the csv file.
            :param path: Path to csv file.
            """
            df = pd.read_csv(path)
            dropped_df = df.drop(drop_column, axis=1)
            dropped_df.to_csv(path)

        files = copy_run_result_files(files, full_ml_test_data_path(),
                                      test_output_dirs.root_dir,
                                      drop_csv_column)
    downloaded_metrics = load_dataframes(files, test_config)
    assert test_config.run_recovery_id
    for mode in test_config.execution_modes_to_download():
        expected_df = _get_metrics_df(test_config.run_recovery_id, mode)
        if drop_column:
            # If dropped a column from dataset_csv_file, remove it from expected dataframe.
            expected_df[drop_column] = ''
        # Drop the "mode" column, because that was added after creating the test data
        metrics = downloaded_metrics[mode]
        assert metrics is not None
        actual_df = metrics.drop(COL_MODE, axis=1)
        actual_df = actual_df.sort_values(
            list(actual_df.columns), ascending=True).reset_index(drop=True)
        pd.testing.assert_frame_equal(expected_df,
                                      actual_df,
                                      check_like=True,
                                      check_dtype=False)
Beispiel #5
0
def download_metrics(config: PlotCrossValidationConfig) -> \
        Tuple[Dict[ModelExecutionMode, Optional[pd.DataFrame]], Path]:
    result_files, root_folder = download_crossval_result_files(config)
    dataframes = load_dataframes(result_files, config)
    return dataframes, root_folder