def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig,
                             comparison_blob_storage_paths: List[Tuple[str, str]]) -> \
        List[ComparisonBaseline]:
    comparison_baselines = []
    for (comparison_name, comparison_path) in comparison_blob_storage_paths:
        # Discard the experiment part of the run rec ID, if any.
        comparison_path = comparison_path.split(":")[-1]
        run_rec_id, blob_path_str = comparison_path.split("/", 1)
        run_rec_id = strip_prefix(run_rec_id, AZUREML_RUN_FOLDER_PREFIX)
        blob_path = Path(
            strip_prefix(blob_path_str, DEFAULT_AML_UPLOAD_DIR + "/"))
        run = azure_config.fetch_run(run_rec_id)
        (comparison_dataset_path,
         comparison_metrics_path) = get_comparison_baseline_paths(
             outputs_folder, blob_path, run, DATASET_CSV_FILE_NAME)
        # If both dataset.csv and metrics.csv were downloaded successfully, read their contents and
        # add a tuple to the comparison data.
        if comparison_dataset_path is not None and comparison_metrics_path is not None and \
                comparison_dataset_path.exists() and comparison_metrics_path.exists():
            comparison_baselines.append(
                ComparisonBaseline(comparison_name,
                                   pd.read_csv(comparison_dataset_path),
                                   pd.read_csv(comparison_metrics_path),
                                   run_rec_id))
        else:
            raise ValueError(
                f"could not find comparison data for run {run_rec_id}")
    return comparison_baselines
Example #2
0
def get_first_child_run(azure_config: AzureConfig) -> Run:
    """
    Download first child run in order to download data
    :param azure_config:
    :return: first child run
    """
    if not azure_config.run_recovery_id:
        raise ValueError("azure_config.run_recovery_id is not provided.")
    hyperdrive_run = azure_config.fetch_run(azure_config.run_recovery_id)
    child_runs = fetch_child_runs(hyperdrive_run, status=RunStatus.COMPLETED)
    return child_runs[0]