def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig,
                             comparison_blob_storage_paths: List[Tuple[str, str]]) -> \
        List[ComparisonBaseline]:
    comparison_baselines = []
    for (comparison_name, comparison_path) in comparison_blob_storage_paths:
        # Discard the experiment part of the run rec ID, if any.
        comparison_path = comparison_path.split(":")[-1]
        run_rec_id, blob_path_str = comparison_path.split("/", 1)
        run_rec_id = strip_prefix(run_rec_id, AZUREML_RUN_FOLDER_PREFIX)
        blob_path = Path(
            strip_prefix(blob_path_str, DEFAULT_AML_UPLOAD_DIR + "/"))
        run = azure_config.fetch_run(run_rec_id)
        (comparison_dataset_path,
         comparison_metrics_path) = get_comparison_baseline_paths(
             outputs_folder, blob_path, run, DATASET_CSV_FILE_NAME)
        # If both dataset.csv and metrics.csv were downloaded successfully, read their contents and
        # add a tuple to the comparison data.
        if comparison_dataset_path is not None and comparison_metrics_path is not None and \
                comparison_dataset_path.exists() and comparison_metrics_path.exists():
            comparison_baselines.append(
                ComparisonBaseline(comparison_name,
                                   pd.read_csv(comparison_dataset_path),
                                   pd.read_csv(comparison_metrics_path),
                                   run_rec_id))
        else:
            raise ValueError(
                f"could not find comparison data for run {run_rec_id}")
    return comparison_baselines
Esempio n. 2
0
def test_get_comparison_data(test_output_dirs: OutputFolderForTests) -> None:
    """
    Check that metrics.csv and dataset.csv are created after the second epoch, if running on Azure.
    """
    run = get_most_recent_run()
    blob_path = get_epoch_results_path(ModelExecutionMode.TEST)
    (comparison_dataset_path,
     comparison_metrics_path) = get_comparison_baseline_paths(
         test_output_dirs.root_dir, blob_path, run, DATASET_CSV_FILE_NAME)
    assert comparison_dataset_path is not None
    assert comparison_metrics_path is not None
Esempio n. 3
0
def test_get_comparison_data(test_output_dirs: OutputFolderForTests) -> None:
    """
    Check that metrics.csv and dataset.csv are created after the second epoch, if running on Azure.
    """
    most_recent_run = get_most_recent_run()
    azure_config = AzureConfig.from_yaml(
        fixed_paths.SETTINGS_YAML_FILE,
        project_root=fixed_paths.repository_root_directory())
    workspace = azure_config.get_workspace()
    run = fetch_run(workspace, most_recent_run)
    blob_path = get_epoch_results_path(2, ModelExecutionMode.TEST)
    (comparison_dataset_path,
     comparison_metrics_path) = get_comparison_baseline_paths(
         test_output_dirs.root_dir, blob_path, run, DATASET_CSV_FILE_NAME)
    assert comparison_dataset_path is not None
    assert comparison_metrics_path is not None