Exemple #1
0
    def copy_child_paths_to_folder(self, model_folder: Path,
                                   checkpoint_paths: List[Path]) -> None:
        """
        Gets the files that are required to register a model for inference. The necessary files are copied from
        the current folder structure into the given temporary folder.
        The folder will contain all source code in the InnerEye folder, possibly additional source code from the
        extra_code_directory, and all checkpoints in a newly created "checkpoints" folder inside the model.
        :param model_folder: The folder into which all files should be copied.
        :param checkpoint_paths: A list with absolute paths to checkpoint files. They are expected to be
        inside of the model's checkpoint folder.
        """
        def copy_folder(source_folder: Path,
                        destination_folder: str = "") -> None:
            logging.info(f"Copying folder for registration: {source_folder}")
            destination_folder = destination_folder or source_folder.name
            shutil.copytree(str(source_folder),
                            str(model_folder / destination_folder),
                            ignore=shutil.ignore_patterns('*.pyc'))

        def copy_file(source: Path, destination_file: str) -> None:
            logging.info(
                f"Copying file for registration: {source} to {destination_file}"
            )
            destination = model_folder / destination_file
            if destination.is_file():
                # This could happen if there is score.py inside of the InnerEye package and also inside the calling
                # project. The latter will have precedence
                logging.warning(
                    f"Overwriting existing {source.name} with {source}")
            destination.parent.mkdir(parents=True, exist_ok=True)
            shutil.copy(str(source), str(destination))

        relative_checkpoint_paths = []
        for checkpoint in checkpoint_paths:
            if checkpoint.is_absolute():
                try:
                    # Checkpoints live in a folder structure in the checkpoint folder. There can be multiple of
                    # them, with identical names, coming from an ensemble run. Hence, preserve their folder structure.
                    checkpoint_relative = checkpoint.relative_to(
                        self.model_config.checkpoint_folder)
                except ValueError:
                    raise ValueError(
                        f"Checkpoint file {checkpoint} was expected to be in a subfolder of "
                        f"{self.model_config.checkpoint_folder}")
                # Checkpoints go into a newly created folder "checkpoints" inside of the model folder
                relative_checkpoint_paths.append(
                    str(Path(CHECKPOINT_FOLDER) / checkpoint_relative))
            else:
                raise ValueError(
                    f"Expected an absolute path to a checkpoint file, but got: {checkpoint}"
                )
        model_folder.mkdir(parents=True, exist_ok=True)
        model_inference_config = ModelInferenceConfig(
            model_name=self.model_config.model_name,
            structure_names=self.model_config.ground_truth_ids_display_names,
            colours=self.model_config.colours,
            fill_holes=self.model_config.fill_holes,
            model_configs_namespace=self.model_config.__class__.__module__,
            checkpoint_paths=relative_checkpoint_paths)
        # Inference configuration must live in the root folder of the registered model
        full_path_to_config = model_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
        full_path_to_config.write_text(model_inference_config.to_json(),
                                       encoding='utf-8')  # type: ignore
        # Merge the conda files into one merged environment file at the root of the model
        merged_conda_file = model_folder / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
        merge_conda_files(get_all_environment_files(self.project_root),
                          result_file=merged_conda_file)
        # InnerEye package: This can be either in Python's package folder, or a plain folder. In both cases,
        # we can identify it by going up the folder structure off a known file (repository_root does exactly that)
        repository_root = fixed_paths.repository_root_directory()
        copy_folder(repository_root / INNEREYE_PACKAGE_NAME)
        # Extra code directory is expected to be relative to the project root folder.
        if self.azure_config.extra_code_directory:
            extra_code_folder = self.project_root / self.azure_config.extra_code_directory
            if extra_code_folder.is_dir():
                copy_folder(extra_code_folder)
            else:
                logging.warning(
                    f"The `extra_code_directory` is set to '{self.azure_config.extra_code_directory}', "
                    "but this folder does not exist in the project root folder."
                )
        # All files at project root should be copied as-is. Those should be essential things like score.py that
        # are needed for inference to run. First try to find them at repository root (but they might not be there
        # if InnerEye is used as a package), then at project root.
        files_to_copy = list(repository_root.glob("*.py"))
        if repository_root != self.project_root:
            files_to_copy.extend(self.project_root.glob("*.py"))
        for f in files_to_copy:
            copy_file(f, destination_file=f.name)
        for (checkpoint_source,
             checkpoint_destination) in zip(checkpoint_paths,
                                            relative_checkpoint_paths):
            if checkpoint_source.is_file():
                copy_file(checkpoint_source,
                          destination_file=str(checkpoint_destination))
            else:
                raise ValueError(
                    f"Checkpoint file {checkpoint_source} does not exist")
Exemple #2
0
    def register_segmentation_model(self,
                                    best_epoch: int,
                                    best_epoch_dice: float,
                                    checkpoint_paths: List[Path],
                                    model_proc: ModelProcessing,
                                    run: Optional[Run] = None,
                                    workspace: Optional[Workspace] = None,
                                    tags: Optional[Dict[str, str]] = None) -> \
            Tuple[Optional[Model], Optional[Path], Any]:
        """
        Registers a new model in the workspace's model registry to be deployed further,
        and creates a model zip for portal deployment (if required). This model, is the
        model checkpoint with the highest test accuracy.
        :param best_epoch: The training epoch that resulted in the highest validation score.
        :param best_epoch_dice: Dice metric for the best epoch
        :param checkpoint_paths: Checkpoint paths to use to upload model checkpoints to AML.
        :param model_proc: whether it's a single or ensemble model.
        :param run: If provided then the run's workspace and tags will be used to register the model.
        :param workspace: If provided, then this workspace will be used to register the model instead of the
        workspace associated with the provided run.
        :param tags: If provided, then these will be used instead of the tags found in the provided run.
        :returns AML model object, the path to the specially-deployed model if any, and a further object
        relating to model deployment; if model_deployment_hook is None, the last two are also None.
        However if a model cannot be registered because the run is an _OfflineRun, or the model_config is not
        for a segmentation model, None is returned instead of a model.
        """
        if not isinstance(self.model_config, SegmentationModelBase):
            logging.warning("Non-segmentation models cannot be registered")
            return None, None, None
        if (run is None) == (workspace is None):
            raise ValueError(
                "Either a run or a workspace must be provided but not both")
        elif run:
            if not hasattr(run, 'experiment'):
                logging.warning(
                    "Not registering a model, because the run has no associated experiment"
                )
                return None, None, None
            workspace = run.experiment.workspace
            tags = run.get_tags()

        relative_checkpoint_paths = [
            x.relative_to(self.project_root) if x.is_absolute() else x
            for x in checkpoint_paths
        ]
        model_inference_config = ModelInferenceConfig(
            model_name=self.model_config.model_name,
            structure_names=self.model_config.ground_truth_ids_display_names,
            colours=self.model_config.colours,
            fill_holes=self.model_config.fill_holes,
            model_configs_namespace=self.model_config.__class__.__module__,
            checkpoint_paths=list(map(str, relative_checkpoint_paths)))
        full_path_to_config = self.project_root / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME
        full_path_to_config.write_text(model_inference_config.to_json(),
                                       encoding='utf-8')  # type: ignore
        relative_child_paths = self.get_child_paths(checkpoint_paths)

        # Add experiment and run ID to tags
        if run is not None:
            tags = self.tags_with_run_information(run, tags)
        model = Model.register(
            workspace=workspace,
            model_path=str(self.project_root),
            child_paths=relative_child_paths,
            model_name=self.model_config.model_name,
            tags=tags,
            description="Best epoch: {}, Accuracy : {}".format(
                best_epoch, best_epoch_dice))
        logging.info(
            f"Registered {model_proc.value} model: {model.name}, with Id: {model.id}"
        )

        # update the run's tags with the registered model information
        if not self.model_config.is_offline_run:
            update_run_tags(run, {MODEL_ID_KEY_NAME: model.id})

        # create a version of the model for deployment if the hook is provided
        if self.model_deployment_hook is not None:
            assert isinstance(self.model_config, SegmentationModelBase)
            deployment_model_path, deployment_model_spec = self.model_deployment_hook(
                self.model_config, self.azure_config, model, model_proc)
            return model, deployment_model_path, deployment_model_spec
        return model, None, None