def update_azure_config(self, azure_config: AzureConfig) -> None: # Override parameter with different name azure_config.subscription_id = self.container_subscription_id # Override parameter with clashing name azure_config.tenant_id = self.tenant_id # Override with hard-coded value azure_config.experiment_name = "hardcoded-experiment-name"
def test_download_checkpoints(test_output_dirs: OutputFolderForTests, is_ensemble: bool, runner_config: AzureConfig) -> None: output_dir = test_output_dirs.root_dir assert get_results_blob_path("some_run_id") == "azureml/ExperimentRun/dcid.some_run_id" # Any recent run ID from a PR build will do. Use a PR build because the checkpoint files are small there. config = ModelConfigBase(should_validate=False) config.set_output_to(output_dir) runner_config.run_recovery_id = DEFAULT_ENSEMBLE_RUN_RECOVERY_ID if is_ensemble else DEFAULT_RUN_RECOVERY_ID run_recovery = RunRecovery.download_checkpoints_from_recovery_run(runner_config, config) run_to_recover = fetch_run(workspace=runner_config.get_workspace(), run_recovery_id=runner_config.run_recovery_id) expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX if is_ensemble: child_runs = fetch_child_runs(run_to_recover) expected_files = [config.checkpoint_folder / OTHER_RUNS_SUBDIR_NAME / str(x.get_tags()['cross_validation_split_index']) / expected_checkpoint_file for x in child_runs] else: expected_files = [config.checkpoint_folder / run_to_recover.id / expected_checkpoint_file] checkpoint_paths = run_recovery.get_checkpoint_paths(1) if is_ensemble: assert len(run_recovery.checkpoints_roots) == len(expected_files) assert all([(x in [y.parent for y in expected_files]) for x in run_recovery.checkpoints_roots]) assert len(checkpoint_paths) == len(expected_files) assert all([x in expected_files for x in checkpoint_paths]) else: assert len(checkpoint_paths) == 1 assert checkpoint_paths[0] == expected_files[0] assert all([expected_file.exists() for expected_file in expected_files])
def parse_and_load_model(self) -> ParserResult: """ Parses the command line arguments, and creates configuration objects for the model itself, and for the Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the parser output from parsing the model commandline arguments. If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return value is None. """ # Create a parser that will understand only the args we need for an AzureConfig parser1 = create_runner_parser() parser_result = parse_args_and_add_yaml_variables(parser1, yaml_config_file=self.yaml_config_file, project_root=self.project_root, fail_on_unknown_args=False) azure_config = AzureConfig(**parser_result.args) azure_config.project_root = self.project_root self.azure_config = azure_config self.model_config = None if not azure_config.model: raise ValueError("Parameter 'model' needs to be set to tell InnerEye which model to run.") model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args) # Create the model as per the "model" commandline option. This can return either a built-in config # of type DeepLearningConfig, or a LightningContainer. config_or_container = model_config_loader.create_model_config_from_name(model_name=azure_config.model) def parse_overrides_and_apply(c: object, previous_parser_result: ParserResult) -> ParserResult: assert isinstance(c, GenericConfig) parser = type(c).create_argparser() # For each parser, feed in the unknown settings from the previous parser. All commandline args should # be consumed by name, hence fail if there is something that is still unknown. parser_result = parse_arguments(parser, settings_from_yaml=previous_parser_result.unknown_settings_from_yaml, args=previous_parser_result.unknown, fail_on_unknown_args=True) # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline. c.apply_overrides(parser_result.known_settings_from_yaml) c.apply_overrides(parser_result.overrides) c.validate() return parser_result # Now create a parser that understands overrides at model/container level. parser_result = parse_overrides_and_apply(config_or_container, parser_result) if isinstance(config_or_container, LightningContainer): self.lightning_container = config_or_container elif isinstance(config_or_container, ModelConfigBase): # Built-in InnerEye models use a fake container self.model_config = config_or_container self.lightning_container = InnerEyeContainer(config_or_container) else: raise ValueError(f"Don't know how to handle a loaded configuration of type {type(config_or_container)}") if azure_config.extra_code_directory: exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist" logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}") else: logging.info("extra_code_directory is unset") return parser_result
def get_or_create_python_environment(azure_config: AzureConfig, source_config: SourceConfig, environment_name: str = "", register_environment: bool = True) -> Environment: """ Creates a description for the Python execution environment in AzureML, based on the Conda environment definition files that are specified in `source_config`. If such environment with this Conda environment already exists, it is retrieved, otherwise created afresh. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided. This parameter is meant to be used when running inference for an existing model. :param register_environment: If True, the Python environment will be registered in the AzureML workspace. If False, it will only be created, but not registered. Use this for unit testing. """ # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be # necessary if the innereye package is installed. It is necessary when working with an outer project and # InnerEye as a git submodule and submitting jobs from the local machine. # In case of version conflicts, the package version in the outer project is given priority. conda_dependencies, merged_yaml = merge_conda_dependencies(source_config.conda_dependencies_files) # type: ignore if azure_config.pip_extra_index_url: # When an extra-index-url is supplied, swap the order in which packages are searched for. # This is necessary if we need to consume packages from extra-index that clash with names of packages on # pypi conda_dependencies.set_pip_option(f"--index-url {azure_config.pip_extra_index_url}") conda_dependencies.set_pip_option("--extra-index-url https://pypi.org/simple") env_variables = { "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": str(source_config.upload_timeout_seconds), "MKL_SERVICE_FORCE_INTEL": "1", **(source_config.environment_variables or {}) } base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04" # Create a name for the environment that will likely uniquely identify it. AzureML does hashing on top of that, # and will re-use existing environments even if they don't have the same name. # Hashing should include everything that can reasonably change. Rely on hashlib here, because the built-in # hash function gives different results for the same string in different python instances. hash_string = "\n".join([merged_yaml, azure_config.docker_shm_size, base_image, str(env_variables)]) sha1 = hashlib.sha1(hash_string.encode("utf8")) overall_hash = sha1.hexdigest()[:32] unique_env_name = f"InnerEye-{overall_hash}" try: env_name_to_find = environment_name or unique_env_name env = Environment.get(azure_config.get_workspace(), name=env_name_to_find, version=ENVIRONMENT_VERSION) logging.info(f"Using existing Python environment '{env.name}'.") return env except Exception: logging.info(f"Python environment '{unique_env_name}' does not yet exist, creating and registering it.") env = Environment(name=unique_env_name) env.docker.enabled = True env.docker.shm_size = azure_config.docker_shm_size env.python.conda_dependencies = conda_dependencies env.docker.base_image = base_image env.environment_variables = env_variables if register_environment: env.register(azure_config.get_workspace()) return env
def test_download_checkpoints_hyperdrive_run(test_output_dirs: OutputFolderForTests, runner_config: AzureConfig) -> None: output_dir = test_output_dirs.root_dir config = ModelConfigBase(should_validate=False) config.set_output_to(output_dir) runner_config.run_recovery_id = DEFAULT_ENSEMBLE_RUN_RECOVERY_ID child_runs = fetch_child_runs(run=fetch_run(runner_config.get_workspace(), DEFAULT_ENSEMBLE_RUN_RECOVERY_ID)) # recover child runs separately also to test hyperdrive child run recovery functionality expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX for child in child_runs: expected_files = [config.checkpoint_folder / child.id / expected_checkpoint_file] run_recovery = RunRecovery.download_checkpoints_from_recovery_run(runner_config, config, child) assert all([x in expected_files for x in run_recovery.get_checkpoint_paths(epoch=1)]) assert all([expected_file.exists() for expected_file in expected_files])
def parse_and_load_model(self) -> Optional[ParserResult]: """ Parses the command line arguments, and creates configuration objects for the model itself, and for the Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the parser output from parsing the model commandline arguments. If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return value is None. """ # Create a parser that will understand only the args we need for an AzureConfig parser1 = create_runner_parser() parser1_result = parse_args_and_add_yaml_variables(parser1, yaml_config_file=self.yaml_config_file, project_root=self.project_root, args=self.command_line_args, fail_on_unknown_args=False) azure_config = AzureConfig(**parser1_result.args) azure_config.project_root = self.project_root self.azure_config = azure_config self.model_config = None # type: ignore if not azure_config.model: return None model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser1_result.args) # Create the model as per the "model" commandline option model_config = model_config_loader.create_model_config_from_name( model_name=azure_config.model ) # This model will be either a classification model or a segmentation model. Those have different # fields that could be overridden on the command line. Create a parser that understands the fields we need # for the actual model type. We feed this parser will the YAML settings and commandline arguments that the # first parser did not recognize. parser2 = type(model_config).create_argparser() parser2_result = parse_arguments(parser2, settings_from_yaml=parser1_result.unknown_settings_from_yaml, args=parser1_result.unknown, fail_on_unknown_args=True) # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline. model_config.apply_overrides(parser1_result.unknown_settings_from_yaml) model_config.apply_overrides(parser2_result.overrides) model_config.validate() # Set the file system related configs, they might be affected by the overrides that were applied. logging.info("Creating the adjusted output folder structure.") model_config.create_filesystem(self.project_root) if azure_config.extra_code_directory: exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist" logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}") else: logging.info("extra_code_directory is unset") self.model_config = model_config return parser2_result
def test_download_blobxfer(test_output_dirs: TestOutputDirectories, is_file: bool, runner_config: AzureConfig) -> None: """ Test for a bug in early versions of download_blobs: download is happening via prefixes, but because of stripping leading directory names, blobs got overwritten. """ root = Path(test_output_dirs.root_dir) account_key = runner_config.get_dataset_storage_account_key() assert account_key is not None # Expected test data in Azure blobs: # folder1/folder1.txt with content "folder1.txt" # folder1_with_suffix/folder2.txt with content "folder2.txt" # folder1_with_suffix/folder1.txt with content "this comes from folder2" # with bug present, folder1_with_suffix/folder1.txt will overwrite folder1/folder1.txt blobs_root_path = "data-for-testsuite/folder1" if is_file: blobs_root_path += "/folder1.txt" download_blobs(runner_config.datasets_storage_account, account_key, blobs_root_path, root, is_file) folder1 = root / "folder1.txt" assert folder1.exists() if not is_file: otherfile = root / "otherfile.txt" folder2 = root / "folder2.txt" assert folder1.read_text().strip() == "folder1.txt" assert otherfile.exists() assert otherfile.read_text().strip() == "folder1.txt" assert not folder2.exists()
def get_default_azure_config() -> AzureConfig: """ Gets the Azure-related configuration options, using the default settings file settings.yaml. """ return AzureConfig.from_yaml( yaml_file_path=fixed_paths.SETTINGS_YAML_FILE, project_root=fixed_paths.repository_root_directory())
def test_build_config(test_output_dirs: OutputFolderForTests) -> None: """ Test that json with build information is created correctly. """ config = AzureConfig(build_number=42, build_user="******", build_branch="branch", build_source_id="00deadbeef", build_source_author="author", tag="tag", model="model") result_location = ExperimentResultLocation(azure_job_name="job") net_json = build_information_to_dot_net_json(config, result_location) expected = '{"BuildNumber": 42, "BuildRequestedFor": "user", "BuildSourceBranchName": "branch", ' \ '"BuildSourceVersion": "00deadbeef", "BuildSourceAuthor": "author", "ModelName": "model", ' \ '"ResultsContainerName": null, "ResultsUri": null, "DatasetFolder": null, "DatasetFolderUri": null, ' \ '"AzureBatchJobName": "job"}' assert expected == net_json result_folder = test_output_dirs.root_dir / "buildinfo" build_information_to_dot_net_json_file(config, result_location, folder=result_folder) result_file = result_folder / BUILDINFORMATION_JSON assert result_file.exists() assert result_file.read_text() == expected
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, all_azure_dataset_ids: List[str], all_dataset_mountpoints: List[str], environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run. :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ dataset_consumptions = create_dataset_consumptions( azure_config, all_azure_dataset_ids, all_dataset_mountpoints) # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to( source_config.root_folder).as_posix() logging.info( f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds( azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment( azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration( node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if len(dataset_consumptions) > 0: run_config.data = { dataset.name: dataset for dataset in dataset_consumptions } # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get( WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func( script_run_config) # type: ignore return script_run_config
def create_dataset_consumptions( azure_config: AzureConfig, all_azure_dataset_ids: List[str], all_dataset_mountpoints: List[str]) -> List[DatasetConsumptionConfig]: """ Sets up all the dataset consumption objects for the datasets provided. Datasets that have an empty name will be skipped. :param azure_config: azure related configurations to use for model scale-out behaviour :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run. :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points. :return: A list of DatasetConsumptionConfig, in the same order as datasets were provided in all_azure_dataset_ids, omitting datasets with an empty name. """ dataset_consumptions: List[DatasetConsumptionConfig] = [] if len(all_dataset_mountpoints) > 0: if len(all_azure_dataset_ids) != len(all_dataset_mountpoints): raise ValueError( f"The number of dataset mount points ({len(all_dataset_mountpoints)}) " f"must equal the number of Azure dataset IDs ({len(all_azure_dataset_ids)})" ) else: all_dataset_mountpoints = [""] * len(all_azure_dataset_ids) for i, (dataset_id, mount_point) in enumerate( zip(all_azure_dataset_ids, all_dataset_mountpoints)): if dataset_id: dataset_consumption = azure_config.get_dataset_consumption( dataset_id, i, mount_point) dataset_consumptions.append(dataset_consumption) elif mount_point: raise ValueError( f"Inconsistent setup: Dataset name at index {i} is empty, but a mount point has " f"been provided ('{mount_point}')") return dataset_consumptions
def test_copy_child_paths_to_folder(is_ensemble: bool, extra_code_directory: str, test_output_dirs: OutputFolderForTests) -> None: azure_config = AzureConfig(extra_code_directory=extra_code_directory) fake_model = SegmentationModelBase(should_validate=False) fake_model.set_output_to(test_output_dirs.root_dir) # To simulate ensemble models, there are two checkpoints, one in the root dir and one in a folder checkpoints_absolute, checkpoints_relative = create_checkpoints(fake_model, is_ensemble) # Simulate a project root: We can't derive that from the repository root because that might point # into Python's package folder project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute) expected_files = [ fixed_paths.ENVIRONMENT_YAML_FILE_NAME, fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME, "InnerEye/ML/runner.py", "InnerEye/ML/model_testing.py", "InnerEye/Common/fixed_paths.py", "InnerEye/Common/common_util.py", ] for r in checkpoints_relative: expected_files.append(f"{CHECKPOINT_FOLDER}/{r}") for expected_file in expected_files: assert (model_folder / expected_file).is_file(), f"File missing: {expected_file}" trm = model_folder / "TestsOutsidePackage/test_register_model.py" if extra_code_directory: assert trm.is_file() else: assert not trm.is_file()
def set_run_tags(run: Run, azure_config: AzureConfig, model_config_overrides: str) -> None: """ Set metadata for the run :param run: Run to set metadata for. :param azure_config: The configurations for the present AzureML job :param model_config_overrides: A string that describes which model parameters were overwritten by commandline arguments in the present run. """ git_information = azure_config.get_git_information() run.set_tags({ "tag": azure_config.tag, "model_name": azure_config.model, "execution_mode": ModelExecutionMode.TRAIN.value if azure_config.train else ModelExecutionMode.TEST.value, RUN_RECOVERY_ID_KEY_NAME: azure_util.create_run_recovery_id(run=run), RUN_RECOVERY_FROM_ID_KEY_NAME: azure_config.run_recovery_id, "build_number": str(azure_config.build_number), "build_user": azure_config.build_user, "build_user_email": azure_config.build_user_email, "source_repository": git_information.repository, "source_branch": git_information.branch, "source_id": git_information.commit_id, "source_message": git_information.commit_message, "source_author": git_information.commit_author, "source_dirty": str(git_information.is_dirty), "overrides": model_config_overrides, CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: -1, })
def download_checkpoints_from_recovery_run(azure_config: AzureConfig, config: DeepLearningConfig, run_context: Optional[Run] = None) -> RunRecovery: """ Downloads checkpoints of run corresponding to the run_recovery_id in azure_config, and any checkpoints of the child runs if they exist. :param azure_config: Azure related configs. :param config: Model related configs. :param run_context: Context of the current run (will be used to find the target AML workspace) :return:RunRecovery """ run_context = run_context or RUN_CONTEXT workspace = azure_config.get_workspace() # Find the run to recover in AML workspace if not azure_config.run_recovery_id: raise ValueError("A valid run_recovery_id is required to download recovery checkpoints, found None") run_to_recover = fetch_run(workspace, azure_config.run_recovery_id.strip()) # Handle recovery of a HyperDrive cross validation run (from within a successor HyperDrive run, # not in ensemble creation). In this case, run_recovery_id refers to the parent prior run, so we # need to set run_to_recover to the child of that run whose split index is the same as that of # the current (child) run. if is_cross_validation_child_run(run_context): run_to_recover = next(x for x in fetch_child_runs(run_to_recover) if get_cross_validation_split_index(x) == get_cross_validation_split_index(run_context)) return RunRecovery.download_checkpoints_from_run(config, run_to_recover)
def __init__( self, model_config: ModelConfigBase, azure_config: Optional[AzureConfig] = None, project_root: Optional[Path] = None, post_cross_validation_hook: Optional[ PostCrossValidationHookSignature] = None, model_deployment_hook: Optional[ModelDeploymentHookSignature] = None ) -> None: """ Driver class to run a ML experiment. Note that the project root argument MUST be supplied when using InnerEye as a package! :param model_config: Model related configurations :param azure_config: Azure related configurations :param project_root: Project root. This should only be omitted if calling run_ml from the test suite. Supplying it is crucial when using InnerEye as a package or submodule! :param post_cross_validation_hook: A function to call after waiting for completion of cross validation runs. The function is called with the model configuration and the path to the downloaded and merged metrics files. :param model_deployment_hook: an optional function for deploying a model in an application-specific way. If present, it should take a model config (SegmentationModelBase), an AzureConfig, and an AzureML Model as arguments, and return an optional Path and a further object of any type. """ self.model_config = model_config self.azure_config: AzureConfig = azure_config or AzureConfig() self.project_root: Path = project_root or fixed_paths.repository_root_directory( ) self.post_cross_validation_hook = post_cross_validation_hook self.model_deployment_hook = model_deployment_hook
def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig, comparison_blob_storage_paths: List[Tuple[str, str]]) -> \ List[ComparisonBaseline]: comparison_baselines = [] for (comparison_name, comparison_path) in comparison_blob_storage_paths: # Discard the experiment part of the run rec ID, if any. comparison_path = comparison_path.split(":")[-1] run_rec_id, blob_path_str = comparison_path.split("/", 1) run_rec_id = strip_prefix(run_rec_id, AZUREML_RUN_FOLDER_PREFIX) blob_path = Path( strip_prefix(blob_path_str, DEFAULT_AML_UPLOAD_DIR + "/")) run = azure_config.fetch_run(run_rec_id) (comparison_dataset_path, comparison_metrics_path) = get_comparison_baseline_paths( outputs_folder, blob_path, run, DATASET_CSV_FILE_NAME) # If both dataset.csv and metrics.csv were downloaded successfully, read their contents and # add a tuple to the comparison data. if comparison_dataset_path is not None and comparison_metrics_path is not None and \ comparison_dataset_path.exists() and comparison_metrics_path.exists(): comparison_baselines.append( ComparisonBaseline(comparison_name, pd.read_csv(comparison_dataset_path), pd.read_csv(comparison_metrics_path), run_rec_id)) else: raise ValueError( f"could not find comparison data for run {run_rec_id}") return comparison_baselines
def test_submit_for_inference(test_output_dirs: OutputFolderForTests) -> None: """ Execute the submit_for_inference script on the model that was recently trained. This starts an AzureML job, and downloads the segmentation. Then check if the segmentation was actually produced. :return: """ model = get_most_recent_model() image_file = fixed_paths_for_tests.full_ml_test_data_path( ) / "train_and_test_data" / "id1_channel1.nii.gz" assert image_file.exists(), f"Image file not found: {image_file}" settings_file = fixed_paths.SETTINGS_YAML_FILE assert settings_file.exists(), f"Settings file not found: {settings_file}" azure_config = AzureConfig.from_yaml( settings_file, project_root=fixed_paths.repository_root_directory()) # Read the name of the branch from environment, so that the inference experiment is also listed alongside # all other AzureML runs that belong to the current PR. build_branch = os.environ.get("BUILD_BRANCH", None) experiment_name = to_azure_friendly_string( build_branch) if build_branch else "model_inference" azure_config.get_git_information() args = [ "--image_file", str(image_file), "--model_id", model.id, "--settings", str(settings_file), "--download_folder", str(test_output_dirs.root_dir), "--cluster", "training-nc12", "--experiment", experiment_name ] seg_path = test_output_dirs.root_dir / DEFAULT_RESULT_IMAGE_NAME assert not seg_path.exists( ), f"Result file {seg_path} should not yet exist" submit_for_inference.main( args, project_root=fixed_paths.repository_root_directory()) assert seg_path.exists(), f"Result file {seg_path} was not created"
def test_score_image_dicom_mock_none( test_output_dirs: OutputFolderForTests) -> None: """ Test that dicom in and dicom-rt out works. Finally there is no mocking and full image scoring is run using the PassThroughModel. :param test_output_dirs: Test output directories. """ model_config = PassThroughModel() model_config.set_output_to(test_output_dirs.root_dir) checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt" create_model_and_store_checkpoint(model_config, checkpoint_path) azure_config = AzureConfig() project_root = Path(__file__).parent.parent ml_runner = MLRunner(model_config=model_config, azure_config=azure_config, project_root=project_root) model_folder = test_output_dirs.root_dir / "final" ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=[checkpoint_path]) zipped_dicom_series_path = zip_dicom_series(model_folder) score_pipeline_config = ScorePipelineConfig( data_folder=zipped_dicom_series_path.parent, model_folder=str(model_folder), image_files=[str(zipped_dicom_series_path)], result_image_name=HNSEGMENTATION_FILE.name, use_gpu=False, use_dicom=True) segmentation = score_image(score_pipeline_config) assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder)
def submit_for_inference(args: SubmitForInferenceConfig, azure_config: AzureConfig) -> Optional[Path]: """ Create and submit an inference to AzureML, and optionally download the resulting segmentation. :param azure_config: An object with all necessary information for accessing Azure. :param args: configuration, see SubmitForInferenceConfig :return: path to downloaded segmentation on local disc, or None if none. """ logging.info(f"Building Azure configuration from {args.settings}") logging.info("Getting workspace") workspace = azure_config.get_workspace() logging.info("Identifying model") model = Model(workspace=workspace, id=args.model_id) model_id = model.id logging.info(f"Identified model {model_id}") source_directory = tempfile.TemporaryDirectory() source_directory_name = source_directory.name logging.info( f"Building inference run submission in {source_directory_name}") source_directory_path = Path(source_directory_name) copy_image_file(args.image_file, source_directory_path / DEFAULT_DATA_FOLDER) # We copy over run_scoring.py, and score.py as well in case the model we're using # does not have sufficiently recent versions of those files. for base in ["run_scoring.py", "score.py"]: shutil.copyfile(base, str(source_directory_path / base)) source_config = SourceConfig( root_folder=source_directory_name, entry_script=str(source_directory_path / "run_scoring.py"), script_params={ "--data-folder": ".", "--spawnprocess": "python", "--model-id": model_id, "score.py": "" }, conda_dependencies_files=download_conda_dependency_files( model, source_directory_path)) estimator = create_estimator_from_configs(workspace, azure_config, source_config, []) exp = Experiment(workspace=workspace, name=args.experiment_name) run = exp.submit(estimator) logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}") logging.info(f"Run URL: {run.get_portal_url()}") if not args.keep_upload_folder: source_directory.cleanup() logging.info(f"Deleted submission directory {source_directory_name}") if args.download_folder is None: return None logging.info("Awaiting run completion") run.wait_for_completion() logging.info(f"Run has completed with status {run.get_status()}") download_path = choose_download_path(args.download_folder) logging.info(f"Attempting to download segmentation to {download_path}") run.download_file(DEFAULT_RESULT_IMAGE_NAME, str(download_path)) if download_path.exists(): logging.info(f"Downloaded segmentation to {download_path}") else: logging.warning("Segmentation NOT downloaded") return download_path
def azure_config(self) -> AzureConfig: """ Gets the AzureConfig instance that the script uses. :return: """ if self._azure_config is None: self._azure_config = AzureConfig.from_yaml(self.settings_yaml_file, project_root=self.project_root) return self._azure_config
def azure_config(self) -> AzureConfig: """ Gets the AzureConfig instance that the script uses. :return: """ if self._azure_config is None: self._azure_config = AzureConfig.from_yaml(Path(self.train_yaml_path)) return self._azure_config
def report_structure_extremes(dataset_dir: str, yaml_file: str) -> None: """ Writes structure-extreme lines for the subjects in a directory. If there are any structures with missing slices, a ValueError is raised after writing all the lines. This allows a build failure to be triggered when such structures exist. :param yaml_file: The path to the YAML file that contains all Azure-related options. :param dataset_dir: directory containing subject subdirectories with integer names. """ azure_config = AzureConfig.from_yaml(yaml_file_path=Path(yaml_file)) download_dataset_directory(azure_config, dataset_dir) subjects: Set[int] = set() series_map = None institution_map = None for subj in os.listdir(dataset_dir): try: subjects.add(int(subj)) except ValueError: if subj == "dataset.csv": # We should find this in every dataset_dir. series_map, institution_map = populate_series_maps(os.path.join(dataset_dir, subj)) pass if institution_map is None or series_map is None: raise FileNotFoundError(f"Cannot find {dataset_dir}/dataset.csv") if not subjects: print(f"No subject directories found in {dataset_dir}") return print(f"Found {len(subjects)} subjects in {dataset_dir}") # You could temporarily edit subjects to be an explicit list of integers here, to process only certain subjects: # subjects = [23, 42, 99] full_output_dir = os.path.join(dataset_dir, "structure_extremes_full") os.makedirs(full_output_dir) problems_output_dir = os.path.join(dataset_dir, "structure_extremes_problems") os.makedirs(problems_output_dir) n_missing = 0 files_created: Set[str] = set() for (index, subj_int) in enumerate(sorted(subjects), 1): subj = str(subj_int) institution_id = institution_map.get(subj, "") out = open_with_header(os.path.join(full_output_dir, institution_id + ".txt"), files_created) err = None for line in report_structure_extremes_for_subject(os.path.join(dataset_dir, subj), series_map[subj]): out.write(line + "\n") if line.find(MISSING_SLICE_MARKER) > 0: if err is None: err = open_with_header(os.path.join(problems_output_dir, institution_id + ".txt"), files_created) err.write(line + "\n") n_missing += 1 out.close() if err is not None: err.close() if index % 25 == 0: print(f"Processed {index} subjects") print(f"Processed all {len(subjects)} subjects") upload_to_dataset_directory(azure_config, dataset_dir, files_created) # If we found any structures with missing slices, raise an exception, which should be # uncaught where necessary to make any appropriate build step fail. if n_missing > 0: raise ValueError(f"Found {n_missing} structures with missing slices")
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, azure_dataset_id: str = "", environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty string to not use any datasets. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ if azure_dataset_id: azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id) if not azureml_dataset: raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.") named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY) dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download() else: dataset_consumption = None # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix() logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment(azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if dataset_consumption: run_config.data = {dataset_consumption.name: dataset_consumption} # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func(script_run_config) # type: ignore return script_run_config
def main(settings_yaml_file: Optional[Path] = None, project_root: Optional[Path] = None) -> None: """ Main function. """ logging_to_stdout() config = ReportStructureExtremesConfig.parse_args() azure_config = AzureConfig.from_yaml(yaml_file_path=settings_yaml_file or config.settings, project_root=project_root) report_structure_extremes(config.dataset, azure_config)
def azure_config(self) -> AzureConfig: """ Gets the AzureConfig instance that the script uses. This will either read out a value that has previously been set, or create a new AzureConfig object from the YAML file and project root settings that the present object holds. """ if self._azure_config is None: self._azure_config = AzureConfig.from_yaml( self.settings_yaml_file, project_root=self.project_root) return self._azure_config
def upload_to_dataset_directory(azure_config: AzureConfig, dataset_dir: str, files: Set[str]) -> None: if not files: return account_key = azure_config.get_dataset_storage_account_key() block_blob_service = BlockBlobService(account_name=azure_config.datasets_storage_account, account_key=account_key) container_name = os.path.join(azure_config.datasets_container, os.path.basename(dataset_dir)) for path in files: blob_name = path[len(dataset_dir) + 1:] block_blob_service.create_blob_from_path(container_name, blob_name, path) print(f"Uploaded {path} to {azure_config.datasets_storage_account}:{container_name}/{blob_name}")
def download_dataset_directory(azure_config: AzureConfig, dataset_dir: str) -> bool: if os.path.isdir(dataset_dir): return False account_key = azure_config.get_dataset_storage_account_key() blobs_root_path = os.path.join(azure_config.datasets_container, os.path.basename(dataset_dir)) + "/" sys.stdout.write(f"Downloading data to {dataset_dir} ...") assert account_key is not None # for mypy download_blobs(azure_config.datasets_storage_account, account_key, blobs_root_path, Path(dataset_dir)) sys.stdout.write("done\n") return True
def monitor(monitor_config: AMLTensorBoardMonitorConfig, azure_config: AzureConfig) -> None: """ Starts TensorBoard monitoring as per the provided arguments. :param monitor_config: The config containing information on which runs that need be monitored. :param azure_config: An AzureConfig object with secrets/keys to access the workspace. """ # Fetch AzureML workspace and the experiment runs in it workspace = azure_config.get_workspace() if monitor_config.run_ids is not None: if len(monitor_config.run_ids) == 0: print("At least one run_recovery_id must be given for monitoring.") sys.exit(1) exp_runs = [ azure_util.fetch_run(workspace, run_id) for run_id in monitor_config.run_ids ] else: if monitor_config.experiment_name not in workspace.experiments: print(f"The experiment: {monitor_config.experiment_name} doesn't " f"exist in the {monitor_config.workspace_name} workspace.") sys.exit(1) experiment = Experiment(workspace, monitor_config.experiment_name) filters = common_util.get_items_from_string( monitor_config.run_status) if monitor_config.run_status else [] exp_runs = azure_util.fetch_runs(experiment, filters) if len(exp_runs) == 0: _msg = "No runs to monitor" if monitor_config.run_status: _msg += f"with status [{monitor_config.run_status}]." print(_msg) sys.exit(1) # Start TensorBoard on executing machine ts = Tensorboard(exp_runs, local_root=str(monitor_config.local_root), port=monitor_config.port) print( "==============================================================================" ) for run in exp_runs: print(f"Run URL: {run.get_portal_url()}") print("TensorBoard URL: ") ts.start() print( "==============================================================================\n\n" ) input("Press Enter to close TensorBoard...") ts.stop()
def get_configs( default_model_config: SegmentationModelBase, yaml_file_path: Path ) -> Tuple[SegmentationModelBase, AzureConfig, Dict]: parser_result = create_parser(yaml_file_path) args = parser_result.args runner_config = AzureConfig(**args) logging_to_stdout(args["log_level"]) config = default_model_config or ModelConfigLoader( ).create_model_config_from_name(runner_config.model) config.apply_overrides(parser_result.overrides, should_validate=False) return config, runner_config, args
def get_most_recent_model() -> Model: most_recent_run = get_most_recent_run() azure_config = AzureConfig.from_yaml( fixed_paths.SETTINGS_YAML_FILE, project_root=fixed_paths.repository_root_directory()) workspace = azure_config.get_workspace() run = fetch_run(workspace, most_recent_run) tags = run.get_tags() model_id = tags.get(MODEL_ID_KEY_NAME, None) assert model_id, f"No model_id tag was found on run {most_recent_run}" return Model(workspace=workspace, id=model_id)