def test_merge_conda(test_output_dirs: OutputFolderForTests) -> None: """ Tests the logic for merging Conda environment files. """ env1 = """ channels: - defaults - pytorch dependencies: - conda1=1.0 - conda2=2.0 - conda_both=3.0 - pip: - azureml-sdk==1.7.0 - foo==1.0 """ env2 = """ channels: - defaults dependencies: - conda1=1.1 - conda_both=3.0 - pip: - azureml-sdk==1.6.0 - bar==2.0 """ file1 = test_output_dirs.root_dir / "env1.yml" file1.write_text(env1) file2 = test_output_dirs.root_dir / "env2.yml" file2.write_text(env2) # Spurious test failures on Linux build agents, saying that they can't read the file. Wait a bit. time.sleep(1) files = [file1, file2] merged_file = test_output_dirs.root_dir / "merged.yml" merge_conda_files(files, merged_file) assert merged_file.read_text().splitlines() == """channels: - defaults - pytorch dependencies: - conda1=1.0 - conda1=1.1 - conda2=2.0 - conda_both=3.0 - pip: - azureml-sdk==1.6.0 - azureml-sdk==1.7.0 - bar==2.0 - foo==1.0 """.splitlines() conda_dep = merge_conda_dependencies(files) # We expect to see the union of channels. assert list(conda_dep.conda_channels) == ["defaults", "pytorch"] # Package version conflicts are not resolved, both versions are retained. assert list(conda_dep.conda_packages) == [ "conda1=1.0", "conda1=1.1", "conda2=2.0", "conda_both=3.0" ] assert list(conda_dep.pip_packages) == [ "azureml-sdk==1.6.0", "azureml-sdk==1.7.0", "bar==2.0", "foo==1.0" ]
def copy_child_paths_to_folder(self, model_folder: Path, checkpoint_paths: List[Path]) -> None: """ Gets the files that are required to register a model for inference. The necessary files are copied from the current folder structure into the given temporary folder. The folder will contain all source code in the InnerEye folder, possibly additional source code from the extra_code_directory, and all checkpoints in a newly created "checkpoints" folder inside the model. :param model_folder: The folder into which all files should be copied. :param checkpoint_paths: A list with absolute paths to checkpoint files. They are expected to be inside of the model's checkpoint folder. """ def copy_folder(source_folder: Path, destination_folder: str = "") -> None: logging.info(f"Copying folder for registration: {source_folder}") destination_folder = destination_folder or source_folder.name shutil.copytree(str(source_folder), str(model_folder / destination_folder), ignore=shutil.ignore_patterns('*.pyc')) def copy_file(source: Path, destination_file: str) -> None: logging.info( f"Copying file for registration: {source} to {destination_file}" ) destination = model_folder / destination_file if destination.is_file(): # This could happen if there is score.py inside of the InnerEye package and also inside the calling # project. The latter will have precedence logging.warning( f"Overwriting existing {source.name} with {source}") destination.parent.mkdir(parents=True, exist_ok=True) shutil.copy(str(source), str(destination)) relative_checkpoint_paths = [] for checkpoint in checkpoint_paths: if checkpoint.is_absolute(): try: # Checkpoints live in a folder structure in the checkpoint folder. There can be multiple of # them, with identical names, coming from an ensemble run. Hence, preserve their folder structure. checkpoint_relative = checkpoint.relative_to( self.model_config.checkpoint_folder) except ValueError: raise ValueError( f"Checkpoint file {checkpoint} was expected to be in a subfolder of " f"{self.model_config.checkpoint_folder}") # Checkpoints go into a newly created folder "checkpoints" inside of the model folder relative_checkpoint_paths.append( str(Path(CHECKPOINT_FOLDER) / checkpoint_relative)) else: raise ValueError( f"Expected an absolute path to a checkpoint file, but got: {checkpoint}" ) model_folder.mkdir(parents=True, exist_ok=True) model_inference_config = ModelInferenceConfig( model_name=self.model_config.model_name, structure_names=self.model_config.ground_truth_ids_display_names, colours=self.model_config.colours, fill_holes=self.model_config.fill_holes, model_configs_namespace=self.model_config.__class__.__module__, checkpoint_paths=relative_checkpoint_paths) # Inference configuration must live in the root folder of the registered model full_path_to_config = model_folder / fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME full_path_to_config.write_text(model_inference_config.to_json(), encoding='utf-8') # type: ignore # Merge the conda files into one merged environment file at the root of the model merged_conda_file = model_folder / fixed_paths.ENVIRONMENT_YAML_FILE_NAME merge_conda_files(get_all_environment_files(self.project_root), result_file=merged_conda_file) # InnerEye package: This can be either in Python's package folder, or a plain folder. In both cases, # we can identify it by going up the folder structure off a known file (repository_root does exactly that) repository_root = fixed_paths.repository_root_directory() copy_folder(repository_root / INNEREYE_PACKAGE_NAME) # Extra code directory is expected to be relative to the project root folder. if self.azure_config.extra_code_directory: extra_code_folder = self.project_root / self.azure_config.extra_code_directory if extra_code_folder.is_dir(): copy_folder(extra_code_folder) else: logging.warning( f"The `extra_code_directory` is set to '{self.azure_config.extra_code_directory}', " "but this folder does not exist in the project root folder." ) # All files at project root should be copied as-is. Those should be essential things like score.py that # are needed for inference to run. First try to find them at repository root (but they might not be there # if InnerEye is used as a package), then at project root. files_to_copy = list(repository_root.glob("*.py")) if repository_root != self.project_root: files_to_copy.extend(self.project_root.glob("*.py")) for f in files_to_copy: copy_file(f, destination_file=f.name) for (checkpoint_source, checkpoint_destination) in zip(checkpoint_paths, relative_checkpoint_paths): if checkpoint_source.is_file(): copy_file(checkpoint_source, destination_file=str(checkpoint_destination)) else: raise ValueError( f"Checkpoint file {checkpoint_source} does not exist")