Beispiel #1
0
    def discover_and_download_checkpoint_from_sibling_runs(self) -> None:
        """
        Downloads checkpoints from sibling runs in a hyperdrive run. This is used to gather results from all
        splits in a hyperdrive run.
        """

        self.run_recovery = RunRecovery.download_checkpoints_from_run(
            self.model_config, self.run_context)
        # Check paths are good, just in case
        for path in self.run_recovery.checkpoints_roots:
            if not path.is_dir():
                raise NotADirectoryError(
                    f"Does not exist or is not a directory: {path}")
    def create_ensemble_model(self) -> None:
        """
        Call MLRunner again after training cross-validation models, to create an ensemble model from them.
        """
        # Import only here in case of dependency issues in reduced environment
        from InnerEye.ML.utils.run_recovery import RunRecovery
        with logging_section("Downloading checkpoints from sibling runs"):
            run_recovery = RunRecovery.download_checkpoints_from_run(
                self.azure_config,
                self.model_config,
                PARENT_RUN_CONTEXT,
                output_subdir_name=OTHER_RUNS_SUBDIR_NAME)
            # Check paths are good, just in case
            for path in run_recovery.checkpoints_roots:
                if not path.is_dir():
                    raise NotADirectoryError(
                        f"Does not exist or is not a directory: {path}")
        # Adjust parameters
        self.azure_config.hyperdrive = False
        self.model_config.number_of_cross_validation_splits = 0
        self.model_config.is_train = False
        best_epoch = self.create_ml_runner().run_inference_and_register_model(
            run_recovery, model_proc=ModelProcessing.ENSEMBLE_CREATION)

        crossval_dir = self.plot_cross_validation_and_upload_results()
        Runner.generate_report(self.model_config, best_epoch,
                               ModelProcessing.ENSEMBLE_CREATION)
        # CrossValResults should have been uploaded to the parent run, so we don't need it here.
        remove_file_or_directory(crossval_dir)
        # We can also remove OTHER_RUNS under the root, as it is no longer useful and only contains copies of files
        # available elsewhere. However, first we need to upload relevant parts of OTHER_RUNS/ENSEMBLE.
        other_runs_dir = self.model_config.outputs_folder / OTHER_RUNS_SUBDIR_NAME
        other_runs_ensemble_dir = other_runs_dir / ENSEMBLE_SPLIT_NAME
        if PARENT_RUN_CONTEXT is not None:
            if other_runs_ensemble_dir.exists():
                # Only keep baseline Wilcoxon results and scatterplots and reports
                for subdir in other_runs_ensemble_dir.glob("*"):
                    if subdir.name not in [
                            BASELINE_WILCOXON_RESULTS_FILE,
                            SCATTERPLOTS_SUBDIR_NAME, REPORT_HTML, REPORT_IPYNB
                    ]:
                        remove_file_or_directory(subdir)
                PARENT_RUN_CONTEXT.upload_folder(
                    name=BASELINE_COMPARISONS_FOLDER,
                    path=str(other_runs_ensemble_dir))
            else:
                logging.warning(
                    f"Directory not found for upload: {other_runs_ensemble_dir}"
                )
        remove_file_or_directory(other_runs_dir)