def download_checkpoints_from_run(
         azure_config: AzureConfig,
         config: ModelConfigBase,
         run: Run,
         output_subdir_name: Optional[str] = None) -> RunRecovery:
     """
     Downloads checkpoints of the provided run or, if applicable, its children.
     :param azure_config: Azure related configs.
     :param config: Model related configs.
     :param run: Run whose checkpoints should be recovered
     :return: run recovery information
     """
     child_runs: List[Run] = fetch_child_runs(run)
     logging.debug(f"Run has ID {run.id} and initial child runs are:")
     for child_run in child_runs:
         logging.debug(f"     {child_run.id}")
     checkpoint_subdir_name: Optional[str]
     if output_subdir_name:
         # From e.g. parent_dir/checkpoints we want parent_dir/output_subdir_name, to which we will
         # append split_index / checkpoints below to create child_dst.
         checkpoint_path = Path(config.checkpoint_folder)
         parent_path = checkpoint_path.parent
         checkpoint_subdir_name = checkpoint_path.name
         root_output_dir = parent_path / output_subdir_name
     else:
         root_output_dir = Path(config.checkpoint_folder) / run.id
         checkpoint_subdir_name = None
     # download checkpoints for the run
     download_outputs_from_run(blobs_path=Path(CHECKPOINT_FOLDER),
                               destination=root_output_dir,
                               run=run)
     if len(child_runs) > 0:
         tag_to_use = 'cross_validation_split_index'
         can_use_split_indices = tag_values_all_distinct(
             child_runs, tag_to_use)
         # download checkpoints for the child runs in the root of the parent
         child_runs_checkpoints_roots: List[Path] = []
         for child in child_runs:
             if child.id == RUN_CONTEXT.id:
                 # We expect to find the file(s) we need in config.checkpoint_folder
                 child_dst = Path(config.checkpoint_folder)
             else:
                 subdir = str(child.tags[tag_to_use]
                              if can_use_split_indices else child.number)
                 if checkpoint_subdir_name:
                     child_dst = root_output_dir / subdir / checkpoint_subdir_name
                 else:
                     child_dst = root_output_dir / subdir
                 download_outputs_from_run(
                     blobs_path=Path(CHECKPOINT_FOLDER),
                     destination=child_dst,
                     run=child)
             child_runs_checkpoints_roots.append(child_dst)
         return RunRecovery(checkpoints_roots=child_runs_checkpoints_roots)
     else:
         return RunRecovery(checkpoints_roots=[root_output_dir])
Ejemplo n.º 2
0
def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig,
                             comparison_blob_storage_paths: List[Tuple[str, str]]) -> \
        List[ComparisonBaseline]:
    workspace = azure_config.get_workspace()
    comparison_baselines = []
    for (comparison_name, comparison_path) in comparison_blob_storage_paths:
        # Discard the experiment part of the run rec ID, if any.
        comparison_path = comparison_path.split(":")[-1]
        run_rec_id, blob_path_str = comparison_path.split("/", 1)
        run_rec_id = strip_prefix(run_rec_id, AZUREML_RUN_FOLDER_PREFIX)
        blob_path = Path(strip_prefix(blob_path_str, DEFAULT_AML_UPLOAD_DIR + "/"))
        run = fetch_run(workspace, run_rec_id)
        # We usually find dataset.csv in the same directory as metrics.csv, but we sometimes
        # have to look higher up.
        comparison_dataset_path: Optional[Path] = None
        comparison_metrics_path: Optional[Path] = None
        destination_folder = outputs_folder / run_rec_id / blob_path
        # Look for dataset.csv inside epoch_NNN/Test, epoch_NNN/ and at top level
        for blob_path_parent in step_up_directories(blob_path):
            try:
                comparison_dataset_path = download_outputs_from_run(
                    blob_path_parent / DATASET_CSV_FILE_NAME, destination_folder, run, True)
                break
            except ValueError:
                logging.warning(f"cannot find {DATASET_CSV_FILE_NAME} at {blob_path_parent} in {run_rec_id}")
                pass
            except NotADirectoryError:
                logging.warning(f"{blob_path_parent} is not a directory")
                break
            if comparison_dataset_path is None:
                logging.warning(f"cannot find {DATASET_CSV_FILE_NAME} at or above {blob_path} in {run_rec_id}")
        # Look for epoch_NNN/Test/metrics.csv
        try:
            comparison_metrics_path = download_outputs_from_run(
                blob_path / METRICS_FILE_NAME, destination_folder, run, True)
        except ValueError:
            logging.warning(f"cannot find {METRICS_FILE_NAME} at {blob_path} in {run_rec_id}")
        # If both dataset.csv and metrics.csv were downloaded successfully, read their contents and
        # add a tuple to the comparison data.
        if comparison_dataset_path is not None and comparison_metrics_path is not None and \
                comparison_dataset_path.exists() and comparison_metrics_path.exists():
            comparison_baselines.append(ComparisonBaseline(
                comparison_name,
                pd.read_csv(comparison_dataset_path),
                pd.read_csv(comparison_metrics_path),
                run_rec_id))
        else:
            logging.warning(f"could not find comparison data for run {run_rec_id}")
            for key, path in ("dataset", comparison_dataset_path), ("metrics", comparison_metrics_path):
                logging.warning(f"path to {key} data is {path}")
                # noinspection PyUnresolvedReferences
                if path is not None and not path.exists():
                    logging.warning("    ... but it does not exist")
    return comparison_baselines
Ejemplo n.º 3
0
 def download_or_get_local_file(self,
                                run: Optional[Run],
                                blob_to_download: PathOrString,
                                destination: Path,
                                local_src_subdir: Optional[Path] = None) -> Optional[Path]:
     """
     Downloads a file from the results folder of an AzureML run, or copies it from a local results folder.
     Returns the path to the downloaded file if it exists, or None if the file was not found.
     If the blobs_path contains folders, the same folder structure will be created inside the destination folder.
     For example, downloading "foo.txt" to "/c/temp" will create "/c/temp/foo.txt". Downloading "foo/bar.txt"
     to "/c/temp" will create "/c/temp/foo/bar.txt"
     :param blob_to_download: path of data to download within the run
     :param destination: directory to write to
     :param run: The AzureML run to download from.
     :param local_src_subdir: if not None, then if we copy from a local results folder, that folder is
     self.outputs_directory/local_src_subdir/blob_to_download instead of self.outputs_directory/blob_to_download
     :return: The path to the downloaded file, or None if the file was not found.
     """
     blob_path = Path(blob_to_download)
     blob_parent = blob_path.parent
     if blob_parent != Path("."):
         destination = destination / blob_parent
     downloaded_file = destination / blob_path.name
     # If we've already downloaded the data, leave it as it is
     if downloaded_file.exists():
         logging.info(f"Download of '{blob_path}' to '{downloaded_file}: not needed, already exists'")
         return downloaded_file
     logging.info(f"Download of '{blob_path}' to '{downloaded_file}': proceeding")
     # If the provided run is the current run, then there is nothing to download.
     # Just copy the provided path in the outputs directory to the destination.
     if not destination.exists():
         destination.mkdir(parents=True)
     if run is None or Run.get_context().id == run.id or is_parent_run(run) or is_offline_run_context(run):
         if run is None:
             assert self.local_run_results is not None, "Local run results must be set in unit testing"
             local_src = Path(self.local_run_results)
             if self.local_run_result_split_suffix:
                 local_src = local_src / self.local_run_result_split_suffix
         else:
             local_src = Path(self.outputs_directory)
         if local_src_subdir is not None:
             local_src = local_src / local_src_subdir
         local_src = local_src / blob_path
         if local_src.exists():
             logging.info(f"Copying files from {local_src} to {destination}")
             return Path(shutil.copy(local_src, destination))
         return None
     else:
         try:
             return download_outputs_from_run(
                 blobs_path=blob_path,
                 destination=destination,
                 run=run,
                 is_file=True
             )
         except Exception as ex:
             logging.warning(f"File {blob_to_download} not found in output of run {run.id}: {ex}")
             return None
Ejemplo n.º 4
0
 def download_checkpoints_from_run(config: DeepLearningConfig,
                                   run: Run) -> RunRecovery:
     """
     Downloads checkpoints of the provided run or, if applicable, its children.
     When downloading from a run that does not have sibling runs, a single folder inside the checkpoints folder
     will be created that contains the downloaded checkpoints.
     When downloading from a run that has sibling runs, the checkpoints for the sibling runs will go into
     folder 'OTHER_RUNS/<cross_validation_split>'
     :param config: Model related configs.
     :param run: Run whose checkpoints should be recovered
     :return: run recovery information
     """
     # TODO antonsc: Clarify how we handle the case of multiple checkpoint being downloaded.
     child_runs: List[Run] = fetch_child_runs(run)
     if child_runs:
         logging.info(f"Run has ID {run.id}, child runs: {', '.join(c.id for c in child_runs)}")
         tag_to_use = 'cross_validation_split_index'
         can_use_split_indices = tag_values_all_distinct(child_runs, tag_to_use)
         # download checkpoints for the child runs in the root of the parent
         child_runs_checkpoints_roots: List[Path] = []
         for child in child_runs:
             if child.id == RUN_CONTEXT.id:
                 # We expect to find the file(s) we need in config.checkpoint_folder
                 child_dst = config.checkpoint_folder
             else:
                 subdir = str(child.tags[tag_to_use] if can_use_split_indices else child.number)
                 child_dst = config.checkpoint_folder / OTHER_RUNS_SUBDIR_NAME / subdir
                 download_outputs_from_run(
                     blobs_path=Path(CHECKPOINT_FOLDER),
                     destination=child_dst,
                     run=child
                 )
             child_runs_checkpoints_roots.append(child_dst)
         return RunRecovery(checkpoints_roots=child_runs_checkpoints_roots)
     else:
         logging.info(f"Run with ID {run.id} has no child runs")
         root_output_dir = config.checkpoint_folder / run.id
         # download checkpoints for the run
         download_outputs_from_run(
             blobs_path=Path(CHECKPOINT_FOLDER),
             destination=root_output_dir,
             run=run
         )
         return RunRecovery(checkpoints_roots=[root_output_dir])
Ejemplo n.º 5
0
    def download_all_checkpoints_from_run(config: DeepLearningConfig,
                                          run: Run) -> RunRecovery:
        """
        Downloads all checkpoints of the provided run: The best checkpoint and the recovery checkpoint.
        A single folder inside the checkpoints folder will be created that contains the downloaded checkpoints.
        :param config: Model related configs.
        :param run: Run whose checkpoints should be recovered
        :return: run recovery information
        """
        if fetch_child_runs(run):
            raise ValueError(
                f"AzureML run {run.id} has child runs, this method does not support those."
            )

        root_output_dir = config.checkpoint_folder / run.id
        download_outputs_from_run(blobs_path=Path(CHECKPOINT_FOLDER),
                                  destination=root_output_dir,
                                  run=run)
        return RunRecovery(checkpoints_roots=[root_output_dir])
Ejemplo n.º 6
0
 def download_best_checkpoints_from_child_runs(config: DeepLearningConfig,
                                               run: Run) -> RunRecovery:
     """
     Downloads the best checkpoints from all child runs of the provided Hyperdrive parent run.
     The checkpoints for the sibling runs will go into folder 'OTHER_RUNS/<cross_validation_split>'
     in the checkpoint folder. There is special treatment for the child run that is equal to the present AzureML
     run, its checkpoints will be read off the checkpoint folder as-is.
     :param config: Model related configs.
     :param run: The Hyperdrive parent run to download from.
     :return: run recovery information
     """
     child_runs: List[Run] = fetch_child_runs(run)
     if not child_runs:
         raise ValueError(
             f"AzureML run {run.id} does not have any child runs.")
     logging.info(
         f"Run {run.id} has {len(child_runs)} child runs: {', '.join(c.id for c in child_runs)}"
     )
     tag_to_use = 'cross_validation_split_index'
     can_use_split_indices = tag_values_all_distinct(child_runs, tag_to_use)
     # download checkpoints for the child runs in the root of the parent
     child_runs_checkpoints_roots: List[Path] = []
     for child in child_runs:
         if child.id == RUN_CONTEXT.id:
             # We expect to find the file(s) we need in config.checkpoint_folder
             child_dst = config.checkpoint_folder
         else:
             subdir = str(child.tags[tag_to_use]
                          if can_use_split_indices else child.number)
             child_dst = config.checkpoint_folder / OTHER_RUNS_SUBDIR_NAME / subdir
             download_outputs_from_run(
                 blobs_path=Path(CHECKPOINT_FOLDER) /
                 BEST_CHECKPOINT_FILE_NAME_WITH_SUFFIX,
                 destination=child_dst,
                 run=child,
                 is_file=True)
         child_runs_checkpoints_roots.append(child_dst)
     return RunRecovery(checkpoints_roots=child_runs_checkpoints_roots)
Ejemplo n.º 7
0
    def download_all_checkpoints_from_run(
            config: OutputParams,
            run: Run,
            subfolder: Optional[str] = None) -> RunRecovery:
        """
        Downloads all checkpoints of the provided run inside the checkpoints folder.
        :param config: Model related configs.
        :param run: Run whose checkpoints should be recovered
        :param subfolder: optional subfolder name, if provided the checkpoints will be downloaded to
        CHECKPOINT_FOLDER / subfolder. If None, the checkpoint are downloaded to CHECKPOINT_FOLDER of the current run.
        :return: run recovery information
        """
        if fetch_child_runs(run):
            raise ValueError(
                f"AzureML run {run.id} has child runs, this method does not support those."
            )

        destination_folder = config.checkpoint_folder / subfolder if subfolder else config.checkpoint_folder

        download_outputs_from_run(blobs_path=Path(CHECKPOINT_FOLDER),
                                  destination=destination_folder,
                                  run=run)
        time.sleep(60)  # Needed because AML is not fast enough to download
        return RunRecovery(checkpoints_roots=[destination_folder])
Ejemplo n.º 8
0
def download_pytest_result(
    run: Run, destination_folder: Path = Path.cwd()) -> Path:
    """
    Downloads the pytest result file that is stored in the output folder of the given AzureML run.
    If there is no pytest result file, throw an Exception.
    :param run: The run from which the files should be read.
    :param destination_folder: The folder into which the PyTest result file is downloaded.
    :return: The path (folder and filename) of the downloaded file.
    """
    logging.info(f"Downloading pytest result file: {PYTEST_RESULTS_FILE}")
    try:
        return download_outputs_from_run(PYTEST_RESULTS_FILE,
                                         destination=destination_folder,
                                         run=run,
                                         is_file=True)
    except:
        raise ValueError(
            f"No pytest result file {PYTEST_RESULTS_FILE} was found for run {run.id}"
        )
def load_predictions(run_type: SurfaceDistanceRunType,
                     azure_config: AzureConfig,
                     model_config: SegmentationModelBase,
                     execution_mode: ModelExecutionMode,
                     extended_annotators: List[str],
                     outlier_range: float) -> List[Segmentation]:
    """
    For each run type (IOV or outliers), instantiate a list of predicted Segmentations and return
    :param run_type: either "iov" or "outliers:
    :param azure_config: AzureConfig
    :param model_config: GenericConfig
    :param execution_mode: ModelExecutionMode: Either Test, Train or Val
    :param extended_annotators: List of annotators plus model_name to load segmentations for
    :param outlier_range: The standard deviation from the mean which the points have to be below
    to be considered an outlier.
    :return: list of [(subject_id, structure name and dice_scores)]
    """
    predictions = []
    if run_type == SurfaceDistanceRunType.OUTLIERS:
        first_child_run = sd_util.get_first_child_run(azure_config)
        output_dir = sd_util.get_run_output_dir(azure_config, model_config)
        metrics_path = sd_util.get_metrics_path(azure_config, model_config)

        # Load the downloaded metrics CSV as dataframe and determine worst performing outliers for the Test run
        df = load_csv(metrics_path, [
            MetricsFileColumns.Patient.value,
            MetricsFileColumns.Structure.value
        ])
        test_run_df = df[df['mode'] == execution_mode.value]
        worst_performers = get_worst_performing_outliers(
            test_run_df,
            outlier_range,
            MetricsFileColumns.Dice.value,
            max_n_outliers=-50)

        for (subject_id, structure_name, dice_score, _) in worst_performers:
            subject_prefix = sd_util.get_subject_prefix(
                model_config, execution_mode, subject_id)
            # if not already present, download data for subject
            download_outputs_from_run(blobs_path=subject_prefix,
                                      destination=output_dir,
                                      run=first_child_run)

            # check it has been downloaded
            segmentation_path = output_dir / subject_prefix / f"{structure_name}.nii.gz"
            predictions.append(
                Segmentation(structure_name=structure_name,
                             subject_id=subject_id,
                             segmentation_path=segmentation_path,
                             dice_score=float(dice_score)))

    elif run_type == SurfaceDistanceRunType.IOV:
        subject_id = 0
        iov_dir = Path("outputs") / SurfaceDistanceRunType.IOV.value.lower()
        all_structs = model_config.class_and_index_with_background()
        structs_to_plot = [
            struct_name for struct_name in all_structs.keys()
            if struct_name not in ['background', 'external']
        ]
        for annotator in extended_annotators:
            for struct_name in structs_to_plot:
                segmentation_path = iov_dir / f"{struct_name + annotator}.nii.gz"
                if not segmentation_path.is_file():
                    logging.warning(f"No such file {segmentation_path}")
                    continue
                predictions.append(
                    Segmentation(structure_name=struct_name,
                                 subject_id=subject_id,
                                 segmentation_path=segmentation_path,
                                 annotator=annotator))
    return predictions