コード例 #1
0
    def register_model_for_epoch(self,
                                 checkpoint_paths: List[Path],
                                 model_description: str,
                                 model_proc: ModelProcessing) -> None:
        """
        Registers the model in AzureML, with the given set of checkpoints. The AzureML run's tags are updated
        to describe with information about ensemble creation and the parent run ID.
        :param checkpoint_paths: The set of Pytorch checkpoints that should be included.
        :param model_description: A string description of the model, usually containing accuracy numbers.
        :param model_proc: The type of model that is registered (single or ensemble)
        """
        if not checkpoint_paths:
            # No point continuing, since no checkpoints were found
            logging.warning("Abandoning model registration - no valid checkpoint paths found")
            return

        if not self.model_config.is_offline_run:
            split_index = RUN_CONTEXT.get_tags().get(CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, None)
            if split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX:
                update_run_tags(RUN_CONTEXT, {IS_ENSEMBLE_KEY_NAME: model_proc == ModelProcessing.ENSEMBLE_CREATION})
            elif PARENT_RUN_CONTEXT is not None:
                update_run_tags(RUN_CONTEXT, {PARENT_RUN_ID_KEY_NAME: PARENT_RUN_CONTEXT.id})
        if isinstance(self.model_config, SegmentationModelBase):
            with logging_section(f"Registering {model_proc.value} model"):
                self.register_segmentation_model(
                    checkpoint_paths=checkpoint_paths,
                    model_description=model_description,
                    model_proc=model_proc)
        else:
            logging.info(f"No deployment done for this type of model: {type(self.model_config)}")
コード例 #2
0
    def register_model_for_epoch(self, run_context: Run,
                                 checkpoint_handler: CheckpointHandler,
                                 best_epoch: int, best_epoch_dice: float,
                                 model_proc: ModelProcessing) -> None:

        checkpoint_path_and_epoch = checkpoint_handler.get_checkpoint_from_epoch(
            epoch=best_epoch)
        if not checkpoint_path_and_epoch or not checkpoint_path_and_epoch.checkpoint_paths:
            # No point continuing, since no checkpoints were found
            logging.warning(
                "Abandoning model registration - no valid checkpoint paths found"
            )
            return

        if not self.model_config.is_offline_run:
            split_index = run_context.get_tags().get(
                CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, None)
            if split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX:
                update_run_tags(
                    run_context, {
                        IS_ENSEMBLE_KEY_NAME:
                        model_proc == ModelProcessing.ENSEMBLE_CREATION
                    })
            elif PARENT_RUN_CONTEXT is not None:
                update_run_tags(
                    run_context,
                    {PARENT_RUN_ID_KEY_NAME: PARENT_RUN_CONTEXT.id})
        with logging_section(f"Registering {model_proc.value} model"):
            self.register_segmentation_model(
                run=run_context,
                best_epoch=best_epoch,
                best_epoch_dice=best_epoch_dice,
                checkpoint_paths=checkpoint_path_and_epoch.checkpoint_paths,
                model_proc=model_proc)
コード例 #3
0
 def register_model_for_epoch(self, run_context: Run,
                              run_recovery: Optional[RunRecovery],
                              best_epoch: int, best_epoch_dice: float,
                              model_proc: ModelProcessing) -> None:
     checkpoint_paths = [self.model_config.get_path_to_checkpoint(best_epoch)] if not run_recovery \
         else run_recovery.get_checkpoint_paths(best_epoch)
     if not self.model_config.is_offline_run:
         split_index = run_context.get_tags().get(
             CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, None)
         if split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX:
             update_run_tags(
                 run_context, {
                     IS_ENSEMBLE_KEY_NAME:
                     model_proc == ModelProcessing.ENSEMBLE_CREATION
                 })
         elif PARENT_RUN_CONTEXT is not None:
             update_run_tags(
                 run_context,
                 {PARENT_RUN_ID_KEY_NAME: PARENT_RUN_CONTEXT.id})
     # Discard any checkpoint paths that do not exist - they will make registration fail. This can happen
     # when some child runs fail; it may still be worth registering the model.
     valid_checkpoint_paths = []
     for path in checkpoint_paths:
         if path.exists():
             valid_checkpoint_paths.append(path)
         else:
             logging.warning(
                 f"Discarding non-existent checkpoint path {path}")
     if not valid_checkpoint_paths:
         # No point continuing
         logging.warning(
             "Abandoning model registration - no valid checkpoint paths found"
         )
         return
     with logging_section(f"Registering {model_proc.value} model"):
         self.register_segmentation_model(
             run=run_context,
             best_epoch=best_epoch,
             best_epoch_dice=best_epoch_dice,
             checkpoint_paths=valid_checkpoint_paths,
             model_proc=model_proc)
コード例 #4
0
    def register_segmentation_model(
            self, checkpoint_paths: List[Path], model_description: str,
            model_proc: ModelProcessing
    ) -> Tuple[Optional[Model], Optional[Any]]:
        """
        Registers a new model in the workspace's model registry to be deployed further,
        and creates a model zip for portal deployment (if required). This model is the
        model checkpoint with the highest test accuracy.
        :param model_description: A string description that is added to the deployed model. It would usually contain
        the test set performance and information at which epoch the result was achieved.
        :param checkpoint_paths: Checkpoint paths to use to upload model checkpoints to AML.
        :param model_proc: whether it's a single or ensemble model.
        :returns Tuple element 1: AML model object, or None if no model could be registered.
        Tuple element 2: The result of running the model_deployment_hook, or None if no hook was supplied.
        """
        is_offline_run = is_offline_run_context(RUN_CONTEXT)
        workspace = None
        # Terminate early if this is running outside AzureML, and we can't access the AzureML workspace. This
        # saves time copying around files.
        if is_offline_run:
            try:
                workspace = self.azure_config.get_workspace()
            except Exception:
                logging.warning(
                    "Unable to retrieve AzureML workspace. Was the Azure setup completed?"
                )
                logging.info("No model was registered in AzureML.")
                return None, None
        # The files for the final model can't live in the outputs folder. If they do: when registering the model,
        # the files may not yet uploaded by hosttools, and that may (or not) cause errors. Hence, place the folder
        # for the final models outside of "outputs", and upload manually.
        model_subfolder = FINAL_MODEL_FOLDER if model_proc == ModelProcessing.DEFAULT else FINAL_ENSEMBLE_MODEL_FOLDER
        final_model_folder = self.model_config.file_system_config.run_folder / model_subfolder
        # Copy all code from project and InnerEye into the model folder, and copy over checkpoints.
        # This increases the size of the data stored for the run. The other option would be to store all checkpoints
        # right in the final model folder - however, then that would also contain any other checkpoints that the model
        # produced or downloaded for recovery, bloating the final model file.
        self.copy_child_paths_to_folder(final_model_folder, checkpoint_paths)
        logging.info("Registering the model on the workspace.")
        if is_offline_run:
            model_description = model_description + f"\nModel built by {self.azure_config.build_user} outside AzureML"
            model = Model.register(workspace=workspace,
                                   model_name=self.model_config.model_name,
                                   model_path=str(final_model_folder),
                                   description=model_description)
        else:
            # This is the path under which AzureML will know the files: Either "final_model" or "final_ensemble_model"
            artifacts_path = model_subfolder
            # If the present run is a child run of a Hyperdrive parent run, and we are building an ensemble model,
            # register it the model on the parent run.
            if PARENT_RUN_CONTEXT and model_proc == ModelProcessing.ENSEMBLE_CREATION:
                run_to_register_on = PARENT_RUN_CONTEXT
                logging.info(
                    f"Registering the model on the parent run {run_to_register_on.id}"
                )
            else:
                run_to_register_on = RUN_CONTEXT
                logging.info(
                    f"Registering the model on the current run {run_to_register_on.id}"
                )
            logging.info(
                f"Uploading files in {final_model_folder} with prefix '{artifacts_path}'"
            )
            final_model_folder_relative = final_model_folder.relative_to(
                Path.cwd())
            run_to_register_on.upload_folder(
                name=artifacts_path, path=str(final_model_folder_relative))
            # When registering the model on the run, we need to provide a relative path inside of the run's output
            # folder in `model_path`
            model = run_to_register_on.register_model(
                model_name=self.model_config.model_name,
                model_path=artifacts_path,
                tags=RUN_CONTEXT.get_tags(),
                description=model_description)

        deployment_result = None
        logging.info(
            f"Registered {model_proc.value} model: {model.name}, with Id: {model.id}"
        )
        # update the run's tags with the registered model information
        if not is_offline_run:
            update_run_tags(RUN_CONTEXT, {MODEL_ID_KEY_NAME: model.id})
        # create a version of the model for deployment if the hook is provided
        if self.model_deployment_hook is not None:
            assert isinstance(self.model_config, SegmentationModelBase)
            deployment_result = self.model_deployment_hook(
                self.model_config, self.azure_config, model, model_proc)
        return model, deployment_result
コード例 #5
0
    def register_segmentation_model(self,
                                    best_epoch: int,
                                    best_epoch_dice: float,
                                    checkpoint_paths: List[Path],
                                    model_proc: ModelProcessing,
                                    run: Optional[Run] = None,
                                    workspace: Optional[Workspace] = None,
                                    tags: Optional[Dict[str, str]] = None) -> \
            Tuple[Optional[Model], Optional[Path], Any]:
        """
        Registers a new model in the workspace's model registry to be deployed further,
        and creates a model zip for portal deployment (if required). This model, is the
        model checkpoint with the highest test accuracy.
        :param best_epoch: The training epoch that resulted in the highest validation score.
        :param best_epoch_dice: Dice metric for the best epoch
        :param checkpoint_paths: Checkpoint paths to use to upload model checkpoints to AML.
        :param model_proc: whether it's a single or ensemble model.
        :param run: If provided then the run's workspace and tags will be used to register the model.
        :param workspace: If provided, then this workspace will be used to register the model instead of the
        workspace associated with the provided run.
        :param tags: If provided, then these will be used instead of the tags found in the provided run.
        :returns AML model object, the path to the specially-deployed model if any, and a further object
        relating to model deployment; if model_deployment_hook is None, the last two are also None.
        However if a model cannot be registered because the run is an _OfflineRun, or the model_config is not
        for a segmentation model, None is returned instead of a model.
        """
        if not isinstance(self.model_config, SegmentationModelBase):
            logging.warning("Non-segmentation models cannot be registered")
            return None, None, None
        if (run is None) == (workspace is None):
            raise ValueError(
                "Either a run or a workspace must be provided but not both")
        elif run:
            if not hasattr(run, 'experiment'):
                logging.warning(
                    "Not registering a model, because the run has no associated experiment"
                )
                return None, None, None
            workspace = run.experiment.workspace
            tags = run.get_tags()

        relative_checkpoint_paths = [
            x.relative_to(self.project_root) if x.is_absolute() else x
            for x in checkpoint_paths
        ]
        model_inference_config = ModelInferenceConfig(
            model_name=self.model_config.model_name,
            structure_names=self.model_config.ground_truth_ids_display_names,
            colours=self.model_config.colours,
            fill_holes=self.model_config.fill_holes,
            model_configs_namespace=self.model_config.__class__.__module__,
            checkpoint_paths=list(map(str, relative_checkpoint_paths)))
        full_path_to_config = self.project_root / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
        full_path_to_config.write_text(model_inference_config.to_json(),
                                       encoding='utf-8')  # type: ignore
        relative_child_paths = self.get_child_paths(checkpoint_paths)

        # Add experiment and run ID to tags
        if run is not None:
            tags = self.tags_with_run_information(run, tags)
        model = Model.register(
            workspace=workspace,
            model_path=str(self.project_root),
            child_paths=relative_child_paths,
            model_name=self.model_config.model_name,
            tags=tags,
            description="Best epoch: {}, Accuracy : {}".format(
                best_epoch, best_epoch_dice))
        logging.info(
            f"Registered {model_proc.value} model: {model.name}, with Id: {model.id}"
        )

        # update the run's tags with the registered model information
        if not self.model_config.is_offline_run:
            update_run_tags(run, {MODEL_ID_KEY_NAME: model.id})

        # create a version of the model for deployment if the hook is provided
        if self.model_deployment_hook is not None:
            assert isinstance(self.model_config, SegmentationModelBase)
            deployment_model_path, deployment_model_spec = self.model_deployment_hook(
                self.model_config, self.azure_config, model, model_proc)
            return model, deployment_model_path, deployment_model_spec
        return model, None, None