Beispiel #1
0
def test_download_or_get_local_file_2(
        test_output_dirs: OutputFolderForTests) -> None:
    config = PlotCrossValidationConfig(
        run_recovery_id=None,
        model_category=ModelCategory.Classification,
        epoch=None,
        should_validate=False)
    download_to_folder = test_output_dirs.root_dir / CROSSVAL_RESULTS_FOLDER
    config.outputs_directory = download_to_folder
    local_results = full_ml_test_data_path(
        "plot_cross_validation") / "HD_cfff5ceb-a227-41d6-a23c-0ebbc33b6301"
    config.local_run_results = str(local_results)
    # A file that sits in the root folder of the local_results should be downloaded into the
    # root of the download_to folder
    file1 = "dummy.txt"
    file_in_folder = config.download_or_get_local_file(None, file1,
                                                       download_to_folder)
    assert file_in_folder is not None
    assert file_in_folder == download_to_folder / file1

    # Copying a file in a sub-folder of the local_results: The full path to the file should be
    # preserved and created in the download_to folder.
    file2 = Path("0") / "Val" / "metrics.csv"
    file_in_folder = config.download_or_get_local_file(None, file2,
                                                       download_to_folder)
    assert file_in_folder is not None
    assert file_in_folder == download_to_folder / file2
Beispiel #2
0
def test_save_outliers(test_config: PlotCrossValidationConfig,
                       test_output_dirs: OutputFolderForTests) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config.outputs_directory = test_output_dirs.root_dir
    test_config.outlier_range = 0
    assert test_config.run_recovery_id
    dataset_split_metrics = {
        x: _get_metrics_df(test_config.run_recovery_id, x)
        for x in [ModelExecutionMode.VAL]
    }
    save_outliers(test_config, dataset_split_metrics,
                  test_config.outputs_directory)
    filename = f"{ModelExecutionMode.VAL.value}_outliers.txt"
    assert_text_files_match(full_file=test_config.outputs_directory / filename,
                            expected_file=full_ml_test_data_path(filename))
    # Now test without the CSV_INSTITUTION_HEADER and CSV_SERIES_HEADER columns, which will be missing in institutions' environments
    dataset_split_metrics_pruned = {
        x: _get_metrics_df(test_config.run_recovery_id, x).drop(
            columns=[CSV_INSTITUTION_HEADER, CSV_SERIES_HEADER],
            errors="ignore")
        for x in [ModelExecutionMode.VAL]
    }
    save_outliers(test_config, dataset_split_metrics_pruned,
                  test_config.outputs_directory)
    test_data_filename = f"{ModelExecutionMode.VAL.value}_outliers_pruned.txt"
    assert_text_files_match(
        full_file=test_config.outputs_directory / filename,
        expected_file=full_ml_test_data_path(test_data_filename))
Beispiel #3
0
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
                       test_output_dirs: OutputFolderForTests) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config_ensemble.outputs_directory = test_output_dirs.root_dir
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, test_config_ensemble.outputs_directory)
    f = f"{ModelExecutionMode.VAL.value}_outliers.txt"
    assert_text_files_match(full_file=test_config_ensemble.outputs_directory / f,
                            expected_file=full_ml_test_data_path(f))
Beispiel #4
0
def test_save_outliers(test_config_ensemble: PlotCrossValidationConfig,
                       test_output_dirs: TestOutputDirectories) -> None:
    """Test to make sure the outlier file for a split is as expected"""
    test_config_ensemble.outputs_directory = test_output_dirs.root_dir
    test_config_ensemble.outlier_range = 0
    dataset_split_metrics = {x: _get_metrics_df(x) for x in [ModelExecutionMode.VAL]}
    save_outliers(test_config_ensemble, dataset_split_metrics, Path(test_config_ensemble.outputs_directory))
    assert_file_contents_match_exactly(full_file=Path(test_config_ensemble.outputs_directory)
                                                 / f"{ModelExecutionMode.VAL.value}_outliers.txt",
                                       expected_file=Path(
                                           full_ml_test_data_path(
                                               f"{ModelExecutionMode.VAL.value}_outliers.txt")))
def test_download_non_existing_file_in_crossval(test_output_dirs: OutputFolderForTests) -> None:
    """
    Downloading a non-existing file when trying to load cross validation results
    should not raise an exception.
    """
    run = get_most_recent_run(fallback_run_id_for_local_execution=FALLBACK_SINGLE_RUN)
    config = PlotCrossValidationConfig(run_recovery_id=None,
                                       model_category=ModelCategory.Classification,
                                       epoch=None,
                                       should_validate=False)
    config.outputs_directory = test_output_dirs.root_dir
    does_not_exist = "does_not_exist.txt"
    result = config.download_or_get_local_file(run,
                                               blob_to_download=does_not_exist,
                                               destination=test_output_dirs.root_dir)
    assert result is None
def test_download_or_get_local_blobs(is_current_run: bool,
                                     test_config: PlotCrossValidationConfig,
                                     test_output_dirs: OutputFolderForTests) -> None:
    azure_config = get_default_azure_config()
    azure_config.get_workspace()
    assert test_config.run_recovery_id is not None
    run = Run.get_context() if is_current_run else azure_config.fetch_run(test_config.run_recovery_id)
    run_outputs_dir = full_ml_test_data_path() if is_current_run else Path(DEFAULT_AML_UPLOAD_DIR)
    test_config.outputs_directory = run_outputs_dir
    dst = test_config.download_or_get_local_file(
        blob_to_download="dataset.csv",
        destination=test_output_dirs.root_dir,
        run=run
    )
    assert dst is not None
    assert dst.exists()