def register_model(self,
                       checkpoint_paths: List[Path],
                       model_description: str,
                       model_proc: ModelProcessing) -> None:
        """
        Registers the model in AzureML, with the given set of checkpoints. The AzureML run's tags are updated
        to describe with information about ensemble creation and the parent run ID.
        :param checkpoint_paths: The set of Pytorch checkpoints that should be included.
        :param model_description: A string description of the model, usually containing accuracy numbers.
        :param model_proc: The type of model that is registered (single or ensemble)
        """
        if not checkpoint_paths:
            # No point continuing, since no checkpoints were found
            logging.warning("Abandoning model registration - no valid checkpoint paths found")
            return

        if not self.model_config.is_offline_run:
            split_index = RUN_CONTEXT.get_tags().get(CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, None)
            if split_index == DEFAULT_CROSS_VALIDATION_SPLIT_INDEX:
                RUN_CONTEXT.tag(IS_ENSEMBLE_KEY_NAME, str(model_proc == ModelProcessing.ENSEMBLE_CREATION))
            elif PARENT_RUN_CONTEXT is not None:
                RUN_CONTEXT.tag(PARENT_RUN_ID_KEY_NAME, str(PARENT_RUN_CONTEXT.id))
        if isinstance(self.model_config, SegmentationModelBase):
            with logging_section(f"Registering {model_proc.value} model"):
                self.register_segmentation_model(
                    checkpoint_paths=checkpoint_paths,
                    model_description=model_description,
                    model_proc=model_proc)
        else:
            logging.info(f"No deployment done for this type of model: {type(self.model_config)}")
Exemple #2
0
 def print_git_tags(self) -> None:
     """
     When running in AzureML, print all the tags that contain information about the git repository status,
     for answering the question "which code version was used" from a log file only.
     """
     git_tags = get_git_tags(self.azure_config)
     if is_offline_run_context(RUN_CONTEXT):
         # When running on a VM outside AzureML, we can read git information from the current repository
         tags_to_print = git_tags
     else:
         # When running in AzureML, the git repo information is not necessarily passed in, but we copy the git
         # information into run tags after submitting the job, and can read it out here.
         # Only print out those tags that were created from git-related information
         tags_to_print = {key: value for key, value in RUN_CONTEXT.get_tags().items() if key in git_tags}
     logging.info("Git repository information:")
     for key, value in tags_to_print.items():
         logging.info(f"    {key:20}: {value}")
Exemple #3
0
    def register_segmentation_model(
            self, checkpoint_paths: List[Path], model_description: str,
            model_proc: ModelProcessing
    ) -> Tuple[Optional[Model], Optional[Any]]:
        """
        Registers a new model in the workspace's model registry to be deployed further,
        and creates a model zip for portal deployment (if required). This model is the
        model checkpoint with the highest test accuracy.
        :param model_description: A string description that is added to the deployed model. It would usually contain
        the test set performance and information at which epoch the result was achieved.
        :param checkpoint_paths: Checkpoint paths to use to upload model checkpoints to AML.
        :param model_proc: whether it's a single or ensemble model.
        :returns Tuple element 1: AML model object, or None if no model could be registered.
        Tuple element 2: The result of running the model_deployment_hook, or None if no hook was supplied.
        """
        is_offline_run = is_offline_run_context(RUN_CONTEXT)
        workspace = None
        # Terminate early if this is running outside AzureML, and we can't access the AzureML workspace. This
        # saves time copying around files.
        if is_offline_run:
            try:
                workspace = self.azure_config.get_workspace()
            except Exception:
                logging.warning(
                    "Unable to retrieve AzureML workspace. Was the Azure setup completed?"
                )
                logging.info("No model was registered in AzureML.")
                return None, None
        # The files for the final model can't live in the outputs folder. If they do: when registering the model,
        # the files may not yet uploaded by hosttools, and that may (or not) cause errors. Hence, place the folder
        # for the final models outside of "outputs", and upload manually.
        model_subfolder = FINAL_MODEL_FOLDER if model_proc == ModelProcessing.DEFAULT else FINAL_ENSEMBLE_MODEL_FOLDER
        final_model_folder = self.model_config.file_system_config.run_folder / model_subfolder
        # Copy all code from project and InnerEye into the model folder, and copy over checkpoints.
        # This increases the size of the data stored for the run. The other option would be to store all checkpoints
        # right in the final model folder - however, then that would also contain any other checkpoints that the model
        # produced or downloaded for recovery, bloating the final model file.
        self.copy_child_paths_to_folder(final_model_folder, checkpoint_paths)
        logging.info("Registering the model on the workspace.")
        if is_offline_run:
            model_description = model_description + f"\nModel built by {self.azure_config.build_user} outside AzureML"
            model = Model.register(workspace=workspace,
                                   model_name=self.model_config.model_name,
                                   model_path=str(final_model_folder),
                                   description=model_description)
        else:
            # This is the path under which AzureML will know the files: Either "final_model" or "final_ensemble_model"
            artifacts_path = model_subfolder
            # If the present run is a child run of a Hyperdrive parent run, and we are building an ensemble model,
            # register it the model on the parent run.
            if PARENT_RUN_CONTEXT and model_proc == ModelProcessing.ENSEMBLE_CREATION:
                run_to_register_on = PARENT_RUN_CONTEXT
                logging.info(
                    f"Registering the model on the parent run {run_to_register_on.id}"
                )
            else:
                run_to_register_on = RUN_CONTEXT
                logging.info(
                    f"Registering the model on the current run {run_to_register_on.id}"
                )
            logging.info(
                f"Uploading files in {final_model_folder} with prefix '{artifacts_path}'"
            )
            final_model_folder_relative = final_model_folder.relative_to(
                Path.cwd())
            run_to_register_on.upload_folder(
                name=artifacts_path, path=str(final_model_folder_relative))
            # When registering the model on the run, we need to provide a relative path inside of the run's output
            # folder in `model_path`
            model = run_to_register_on.register_model(
                model_name=self.model_config.model_name,
                model_path=artifacts_path,
                tags=RUN_CONTEXT.get_tags(),
                description=model_description)

        deployment_result = None
        logging.info(
            f"Registered {model_proc.value} model: {model.name}, with Id: {model.id}"
        )
        # update the run's tags with the registered model information
        if not is_offline_run:
            update_run_tags(RUN_CONTEXT, {MODEL_ID_KEY_NAME: model.id})
        # create a version of the model for deployment if the hook is provided
        if self.model_deployment_hook is not None:
            assert isinstance(self.model_config, SegmentationModelBase)
            deployment_result = self.model_deployment_hook(
                self.model_config, self.azure_config, model, model_proc)
        return model, deployment_result
    def register_segmentation_model(self,
                                    checkpoint_paths: List[Path],
                                    model_description: str,
                                    model_proc: ModelProcessing) -> Tuple[Model, Any]:
        """
        Registers a new model in the workspace's model registry to be deployed further,
        and creates a model zip for portal deployment (if required).
        :param model_description: A string description that is added to the deployed model. It would usually contain
        the test set performance and information at which epoch the result was achieved.
        :param checkpoint_paths: Checkpoint paths to use to upload model checkpoints to AML.
        :param model_proc: whether it's a single or ensemble model.
        :returns Tuple element 1: AML model object, or None if no model could be registered.
        Tuple element 2: The result of running the model_deployment_hook, or None if no hook was supplied.
        """
        # The files for the final model can't live in the outputs folder. If they do: when registering the model,
        # the files may not yet uploaded by hosttools, and that may (or not) cause errors. Hence, place the folder
        # for the final models outside of "outputs", and upload manually.
        model_subfolder = FINAL_MODEL_FOLDER if model_proc == ModelProcessing.DEFAULT else FINAL_ENSEMBLE_MODEL_FOLDER
        # This is the path under which AzureML will know the files: Either "final_model" or "final_ensemble_model"
        artifacts_path = model_subfolder
        final_model_folder = self.model_config.file_system_config.run_folder / model_subfolder
        # Copy all code from project and InnerEye into the model folder, and copy over checkpoints.
        # This increases the size of the data stored for the run. The other option would be to store all checkpoints
        # right in the final model folder - however, then that would also contain any other checkpoints that the model
        # produced or downloaded for recovery, bloating the final model file.
        self.copy_child_paths_to_folder(final_model_folder, checkpoint_paths)
        # If the present run is a child run of a Hyperdrive parent run, and we are building an ensemble model,
        # register it the model on the parent run.
        if PARENT_RUN_CONTEXT and model_proc == ModelProcessing.ENSEMBLE_CREATION:
            run_to_register_on = PARENT_RUN_CONTEXT
            logging.info(f"Registering the model on the parent run {run_to_register_on.id}")
        else:
            run_to_register_on = RUN_CONTEXT
            logging.info(f"Registering the model on the current run {run_to_register_on.id}")
        logging.info(f"Uploading files in {final_model_folder} with prefix '{artifacts_path}'")
        final_model_folder_relative = final_model_folder.relative_to(Path.cwd())
        run_to_register_on.upload_folder(name=artifacts_path, path=str(final_model_folder_relative))
        # When registering the model on the run, we need to provide a relative path inside of the run's output
        # folder in `model_path`
        model = run_to_register_on.register_model(
            model_name=self.model_config.model_name,
            model_path=artifacts_path,
            tags=RUN_CONTEXT.get_tags(),
            description=model_description
        )
        # Add the name of the Python environment as a model tag, because we need it when running inference
        # on the model. We could add that as an immutable property, but with tags we have the option to modify
        # to a custom environment later.
        python_environment = RUN_CONTEXT.get_environment()
        assert python_environment.version == ENVIRONMENT_VERSION, \
            f"Expected all Python environments to have version '{ENVIRONMENT_VERSION}', but got: " \
            f"'{python_environment.version}"
        model.add_tags({PYTHON_ENVIRONMENT_NAME: python_environment.name})
        # update the run's tags with the registered model information
        run_to_register_on.tag(MODEL_ID_KEY_NAME, model.id)

        deployment_result = None
        logging.info(f"Registered {model_proc.value} model: {model.name}, with Id: {model.id}")
        # create a version of the model for deployment if the hook is provided
        if self.model_deployment_hook is not None:
            assert isinstance(self.model_config, SegmentationModelBase)
            deployment_result = self.model_deployment_hook(
                self.model_config, self.azure_config, model, model_proc)
        return model, deployment_result