Ejemplo n.º 1
0
 def update_azure_config(self, azure_config: AzureConfig) -> None:
     # Override parameter with different name
     azure_config.subscription_id = self.container_subscription_id
     # Override parameter with clashing name
     azure_config.tenant_id = self.tenant_id
     # Override with hard-coded value
     azure_config.experiment_name = "hardcoded-experiment-name"
Ejemplo n.º 2
0
def test_download_checkpoints(test_output_dirs: OutputFolderForTests, is_ensemble: bool,
                              runner_config: AzureConfig) -> None:
    output_dir = test_output_dirs.root_dir
    assert get_results_blob_path("some_run_id") == "azureml/ExperimentRun/dcid.some_run_id"
    # Any recent run ID from a PR build will do. Use a PR build because the checkpoint files are small there.
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(output_dir)

    runner_config.run_recovery_id = DEFAULT_ENSEMBLE_RUN_RECOVERY_ID if is_ensemble else DEFAULT_RUN_RECOVERY_ID
    run_recovery = RunRecovery.download_checkpoints_from_recovery_run(runner_config, config)
    run_to_recover = fetch_run(workspace=runner_config.get_workspace(), run_recovery_id=runner_config.run_recovery_id)
    expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX
    if is_ensemble:
        child_runs = fetch_child_runs(run_to_recover)
        expected_files = [config.checkpoint_folder
                          / OTHER_RUNS_SUBDIR_NAME
                          / str(x.get_tags()['cross_validation_split_index']) / expected_checkpoint_file
                          for x in child_runs]
    else:
        expected_files = [config.checkpoint_folder / run_to_recover.id / expected_checkpoint_file]

    checkpoint_paths = run_recovery.get_checkpoint_paths(1)
    if is_ensemble:
        assert len(run_recovery.checkpoints_roots) == len(expected_files)
        assert all([(x in [y.parent for y in expected_files]) for x in run_recovery.checkpoints_roots])
        assert len(checkpoint_paths) == len(expected_files)
        assert all([x in expected_files for x in checkpoint_paths])
    else:
        assert len(checkpoint_paths) == 1
        assert checkpoint_paths[0] == expected_files[0]

    assert all([expected_file.exists() for expected_file in expected_files])
Ejemplo n.º 3
0
    def parse_and_load_model(self) -> ParserResult:
        """
        Parses the command line arguments, and creates configuration objects for the model itself, and for the
        Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the
        parser output from parsing the model commandline arguments.
        If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return
        value is None.
        """
        # Create a parser that will understand only the args we need for an AzureConfig
        parser1 = create_runner_parser()
        parser_result = parse_args_and_add_yaml_variables(parser1,
                                                          yaml_config_file=self.yaml_config_file,
                                                          project_root=self.project_root,
                                                          fail_on_unknown_args=False)
        azure_config = AzureConfig(**parser_result.args)
        azure_config.project_root = self.project_root
        self.azure_config = azure_config
        self.model_config = None
        if not azure_config.model:
            raise ValueError("Parameter 'model' needs to be set to tell InnerEye which model to run.")
        model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser_result.args)
        # Create the model as per the "model" commandline option. This can return either a built-in config
        # of type DeepLearningConfig, or a LightningContainer.
        config_or_container = model_config_loader.create_model_config_from_name(model_name=azure_config.model)

        def parse_overrides_and_apply(c: object, previous_parser_result: ParserResult) -> ParserResult:
            assert isinstance(c, GenericConfig)
            parser = type(c).create_argparser()
            # For each parser, feed in the unknown settings from the previous parser. All commandline args should
            # be consumed by name, hence fail if there is something that is still unknown.
            parser_result = parse_arguments(parser,
                                            settings_from_yaml=previous_parser_result.unknown_settings_from_yaml,
                                            args=previous_parser_result.unknown,
                                            fail_on_unknown_args=True)
            # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline.
            c.apply_overrides(parser_result.known_settings_from_yaml)
            c.apply_overrides(parser_result.overrides)
            c.validate()
            return parser_result

        # Now create a parser that understands overrides at model/container level.
        parser_result = parse_overrides_and_apply(config_or_container, parser_result)

        if isinstance(config_or_container, LightningContainer):
            self.lightning_container = config_or_container
        elif isinstance(config_or_container, ModelConfigBase):
            # Built-in InnerEye models use a fake container
            self.model_config = config_or_container
            self.lightning_container = InnerEyeContainer(config_or_container)
        else:
            raise ValueError(f"Don't know how to handle a loaded configuration of type {type(config_or_container)}")
        if azure_config.extra_code_directory:
            exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist"
            logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}")
        else:
            logging.info("extra_code_directory is unset")
        return parser_result
Ejemplo n.º 4
0
def get_or_create_python_environment(azure_config: AzureConfig,
                                     source_config: SourceConfig,
                                     environment_name: str = "",
                                     register_environment: bool = True) -> Environment:
    """
    Creates a description for the Python execution environment in AzureML, based on the Conda environment
    definition files that are specified in `source_config`. If such environment with this Conda environment already
    exists, it is retrieved, otherwise created afresh.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided. This parameter is meant to be used when running
    inference for an existing model.
    :param register_environment: If True, the Python environment will be registered in the AzureML workspace. If
    False, it will only be created, but not registered. Use this for unit testing.
    """
    # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be
    # necessary if the innereye package is installed. It is necessary when working with an outer project and
    # InnerEye as a git submodule and submitting jobs from the local machine.
    # In case of version conflicts, the package version in the outer project is given priority.
    conda_dependencies, merged_yaml = merge_conda_dependencies(source_config.conda_dependencies_files)  # type: ignore
    if azure_config.pip_extra_index_url:
        # When an extra-index-url is supplied, swap the order in which packages are searched for.
        # This is necessary if we need to consume packages from extra-index that clash with names of packages on
        # pypi
        conda_dependencies.set_pip_option(f"--index-url {azure_config.pip_extra_index_url}")
        conda_dependencies.set_pip_option("--extra-index-url https://pypi.org/simple")
    env_variables = {
        "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": str(source_config.upload_timeout_seconds),
        "MKL_SERVICE_FORCE_INTEL": "1",
        **(source_config.environment_variables or {})
    }
    base_image = "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04"
    # Create a name for the environment that will likely uniquely identify it. AzureML does hashing on top of that,
    # and will re-use existing environments even if they don't have the same name.
    # Hashing should include everything that can reasonably change. Rely on hashlib here, because the built-in
    # hash function gives different results for the same string in different python instances.
    hash_string = "\n".join([merged_yaml, azure_config.docker_shm_size, base_image, str(env_variables)])
    sha1 = hashlib.sha1(hash_string.encode("utf8"))
    overall_hash = sha1.hexdigest()[:32]
    unique_env_name = f"InnerEye-{overall_hash}"
    try:
        env_name_to_find = environment_name or unique_env_name
        env = Environment.get(azure_config.get_workspace(), name=env_name_to_find, version=ENVIRONMENT_VERSION)
        logging.info(f"Using existing Python environment '{env.name}'.")
        return env
    except Exception:
        logging.info(f"Python environment '{unique_env_name}' does not yet exist, creating and registering it.")
    env = Environment(name=unique_env_name)
    env.docker.enabled = True
    env.docker.shm_size = azure_config.docker_shm_size
    env.python.conda_dependencies = conda_dependencies
    env.docker.base_image = base_image
    env.environment_variables = env_variables
    if register_environment:
        env.register(azure_config.get_workspace())
    return env
Ejemplo n.º 5
0
def test_download_checkpoints_hyperdrive_run(test_output_dirs: OutputFolderForTests,
                                             runner_config: AzureConfig) -> None:
    output_dir = test_output_dirs.root_dir
    config = ModelConfigBase(should_validate=False)
    config.set_output_to(output_dir)
    runner_config.run_recovery_id = DEFAULT_ENSEMBLE_RUN_RECOVERY_ID
    child_runs = fetch_child_runs(run=fetch_run(runner_config.get_workspace(), DEFAULT_ENSEMBLE_RUN_RECOVERY_ID))
    # recover child runs separately also to test hyperdrive child run recovery functionality
    expected_checkpoint_file = "1" + CHECKPOINT_FILE_SUFFIX
    for child in child_runs:
        expected_files = [config.checkpoint_folder / child.id / expected_checkpoint_file]
        run_recovery = RunRecovery.download_checkpoints_from_recovery_run(runner_config, config, child)
        assert all([x in expected_files for x in run_recovery.get_checkpoint_paths(epoch=1)])
        assert all([expected_file.exists() for expected_file in expected_files])
Ejemplo n.º 6
0
 def parse_and_load_model(self) -> Optional[ParserResult]:
     """
     Parses the command line arguments, and creates configuration objects for the model itself, and for the
     Azure-related parameters. Sets self.azure_config and self.model_config to their proper values. Returns the
     parser output from parsing the model commandline arguments.
     If no "model" argument is provided on the commandline, self.model_config will be set to None, and the return
     value is None.
     """
     # Create a parser that will understand only the args we need for an AzureConfig
     parser1 = create_runner_parser()
     parser1_result = parse_args_and_add_yaml_variables(parser1,
                                                        yaml_config_file=self.yaml_config_file,
                                                        project_root=self.project_root,
                                                        args=self.command_line_args,
                                                        fail_on_unknown_args=False)
     azure_config = AzureConfig(**parser1_result.args)
     azure_config.project_root = self.project_root
     self.azure_config = azure_config
     self.model_config = None  # type: ignore
     if not azure_config.model:
         return None
     model_config_loader: ModelConfigLoader = ModelConfigLoader(**parser1_result.args)
     # Create the model as per the "model" commandline option
     model_config = model_config_loader.create_model_config_from_name(
         model_name=azure_config.model
     )
     # This model will be either a classification model or a segmentation model. Those have different
     # fields that could be overridden on the command line. Create a parser that understands the fields we need
     # for the actual model type. We feed this parser will the YAML settings and commandline arguments that the
     # first parser did not recognize.
     parser2 = type(model_config).create_argparser()
     parser2_result = parse_arguments(parser2,
                                      settings_from_yaml=parser1_result.unknown_settings_from_yaml,
                                      args=parser1_result.unknown,
                                      fail_on_unknown_args=True)
     # Apply the overrides and validate. Overrides can come from either YAML settings or the commandline.
     model_config.apply_overrides(parser1_result.unknown_settings_from_yaml)
     model_config.apply_overrides(parser2_result.overrides)
     model_config.validate()
     # Set the file system related configs, they might be affected by the overrides that were applied.
     logging.info("Creating the adjusted output folder structure.")
     model_config.create_filesystem(self.project_root)
     if azure_config.extra_code_directory:
         exist = "exists" if Path(azure_config.extra_code_directory).exists() else "does not exist"
         logging.info(f"extra_code_directory is {azure_config.extra_code_directory}, which {exist}")
     else:
         logging.info("extra_code_directory is unset")
     self.model_config = model_config
     return parser2_result
Ejemplo n.º 7
0
def test_download_blobxfer(test_output_dirs: TestOutputDirectories, is_file: bool, runner_config: AzureConfig) -> None:
    """
    Test for a bug in early versions of download_blobs: download is happening via prefixes, but because of
    stripping leading directory names, blobs got overwritten.
    """
    root = Path(test_output_dirs.root_dir)
    account_key = runner_config.get_dataset_storage_account_key()
    assert account_key is not None
    # Expected test data in Azure blobs:
    # folder1/folder1.txt with content "folder1.txt"
    # folder1_with_suffix/folder2.txt with content "folder2.txt"
    # folder1_with_suffix/folder1.txt with content "this comes from folder2"
    # with bug present, folder1_with_suffix/folder1.txt will overwrite folder1/folder1.txt
    blobs_root_path = "data-for-testsuite/folder1"
    if is_file:
        blobs_root_path += "/folder1.txt"
    download_blobs(runner_config.datasets_storage_account, account_key, blobs_root_path, root, is_file)

    folder1 = root / "folder1.txt"
    assert folder1.exists()
    if not is_file:
        otherfile = root / "otherfile.txt"
        folder2 = root / "folder2.txt"
        assert folder1.read_text().strip() == "folder1.txt"
        assert otherfile.exists()
        assert otherfile.read_text().strip() == "folder1.txt"
        assert not folder2.exists()
Ejemplo n.º 8
0
def get_default_azure_config() -> AzureConfig:
    """
    Gets the Azure-related configuration options, using the default settings file settings.yaml.
    """
    return AzureConfig.from_yaml(
        yaml_file_path=fixed_paths.SETTINGS_YAML_FILE,
        project_root=fixed_paths.repository_root_directory())
def test_build_config(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that json with build information is created correctly.
    """
    config = AzureConfig(build_number=42,
                         build_user="******",
                         build_branch="branch",
                         build_source_id="00deadbeef",
                         build_source_author="author",
                         tag="tag",
                         model="model")
    result_location = ExperimentResultLocation(azure_job_name="job")
    net_json = build_information_to_dot_net_json(config, result_location)
    expected = '{"BuildNumber": 42, "BuildRequestedFor": "user", "BuildSourceBranchName": "branch", ' \
               '"BuildSourceVersion": "00deadbeef", "BuildSourceAuthor": "author", "ModelName": "model", ' \
               '"ResultsContainerName": null, "ResultsUri": null, "DatasetFolder": null, "DatasetFolderUri": null, ' \
               '"AzureBatchJobName": "job"}'
    assert expected == net_json
    result_folder = test_output_dirs.root_dir / "buildinfo"
    build_information_to_dot_net_json_file(config,
                                           result_location,
                                           folder=result_folder)
    result_file = result_folder / BUILDINFORMATION_JSON
    assert result_file.exists()
    assert result_file.read_text() == expected
Ejemplo n.º 10
0
def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      all_azure_dataset_ids: List[str],
                      all_dataset_mountpoints: List[str],
                      environment_name: str = "") -> ScriptRunConfig:
    """
    Creates a configuration to run the InnerEye training script in AzureML.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run.
    :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points.
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
    when running inference for an existing model.
    :return: The configured script run.
    """
    dataset_consumptions = create_dataset_consumptions(
        azure_config, all_azure_dataset_ids, all_dataset_mountpoints)
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = source_config.entry_script.relative_to(
        source_config.root_folder).as_posix()
    logging.info(
        f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
        f"source directory {source_config.root_folder})")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(
            azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    run_config = RunConfiguration(
        script=entry_script_relative_path,
        arguments=source_config.script_params,
    )
    run_config.environment = get_or_create_python_environment(
        azure_config, source_config, environment_name=environment_name)
    run_config.target = azure_config.cluster
    run_config.max_run_duration_seconds = max_run_duration
    if azure_config.num_nodes > 1:
        distributed_job_config = MpiConfiguration(
            node_count=azure_config.num_nodes)
        run_config.mpi = distributed_job_config
        run_config.framework = "Python"
        run_config.communicator = "IntelMpi"
        run_config.node_count = distributed_job_config.node_count
    if len(dataset_consumptions) > 0:
        run_config.data = {
            dataset.name: dataset
            for dataset in dataset_consumptions
        }
    # Use blob storage for storing the source, rather than the FileShares section of the storage account.
    run_config.source_directory_data_store = workspace.datastores.get(
        WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
    script_run_config = ScriptRunConfig(
        source_directory=str(source_config.root_folder),
        run_config=run_config,
    )
    if azure_config.hyperdrive:
        script_run_config = source_config.hyperdrive_config_func(
            script_run_config)  # type: ignore
    return script_run_config
Ejemplo n.º 11
0
def create_dataset_consumptions(
        azure_config: AzureConfig, all_azure_dataset_ids: List[str],
        all_dataset_mountpoints: List[str]) -> List[DatasetConsumptionConfig]:
    """
    Sets up all the dataset consumption objects for the datasets provided. Datasets that have an empty name will be
    skipped.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run.
    :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points.
    :return: A list of DatasetConsumptionConfig, in the same order as datasets were provided in all_azure_dataset_ids,
    omitting datasets with an empty name.
    """
    dataset_consumptions: List[DatasetConsumptionConfig] = []
    if len(all_dataset_mountpoints) > 0:
        if len(all_azure_dataset_ids) != len(all_dataset_mountpoints):
            raise ValueError(
                f"The number of dataset mount points ({len(all_dataset_mountpoints)}) "
                f"must equal the number of Azure dataset IDs ({len(all_azure_dataset_ids)})"
            )
    else:
        all_dataset_mountpoints = [""] * len(all_azure_dataset_ids)
    for i, (dataset_id, mount_point) in enumerate(
            zip(all_azure_dataset_ids, all_dataset_mountpoints)):
        if dataset_id:
            dataset_consumption = azure_config.get_dataset_consumption(
                dataset_id, i, mount_point)
            dataset_consumptions.append(dataset_consumption)
        elif mount_point:
            raise ValueError(
                f"Inconsistent setup: Dataset name at index {i} is empty, but a mount point has "
                f"been provided ('{mount_point}')")
    return dataset_consumptions
Ejemplo n.º 12
0
def test_copy_child_paths_to_folder(is_ensemble: bool,
                                    extra_code_directory: str,
                                    test_output_dirs: OutputFolderForTests) -> None:
    azure_config = AzureConfig(extra_code_directory=extra_code_directory)
    fake_model = SegmentationModelBase(should_validate=False)
    fake_model.set_output_to(test_output_dirs.root_dir)
    # To simulate ensemble models, there are two checkpoints, one in the root dir and one in a folder
    checkpoints_absolute, checkpoints_relative = create_checkpoints(fake_model, is_ensemble)
    # Simulate a project root: We can't derive that from the repository root because that might point
    # into Python's package folder
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=fake_model, azure_config=azure_config, project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder, checkpoint_paths=checkpoints_absolute)
    expected_files = [
        fixed_paths.ENVIRONMENT_YAML_FILE_NAME,
        fixed_paths.MODEL_INFERENCE_JSON_FILE_NAME,
        "InnerEye/ML/runner.py",
        "InnerEye/ML/model_testing.py",
        "InnerEye/Common/fixed_paths.py",
        "InnerEye/Common/common_util.py",
    ]
    for r in checkpoints_relative:
        expected_files.append(f"{CHECKPOINT_FOLDER}/{r}")
    for expected_file in expected_files:
        assert (model_folder / expected_file).is_file(), f"File missing: {expected_file}"
    trm = model_folder / "TestsOutsidePackage/test_register_model.py"
    if extra_code_directory:
        assert trm.is_file()
    else:
        assert not trm.is_file()
Ejemplo n.º 13
0
def set_run_tags(run: Run, azure_config: AzureConfig, model_config_overrides: str) -> None:
    """
    Set metadata for the run
    :param run: Run to set metadata for.
    :param azure_config: The configurations for the present AzureML job
    :param model_config_overrides: A string that describes which model parameters were overwritten by commandline
     arguments in the present run.
    """
    git_information = azure_config.get_git_information()
    run.set_tags({
        "tag": azure_config.tag,
        "model_name": azure_config.model,
        "execution_mode": ModelExecutionMode.TRAIN.value if azure_config.train else ModelExecutionMode.TEST.value,
        RUN_RECOVERY_ID_KEY_NAME: azure_util.create_run_recovery_id(run=run),
        RUN_RECOVERY_FROM_ID_KEY_NAME: azure_config.run_recovery_id,
        "build_number": str(azure_config.build_number),
        "build_user": azure_config.build_user,
        "build_user_email": azure_config.build_user_email,
        "source_repository": git_information.repository,
        "source_branch": git_information.branch,
        "source_id": git_information.commit_id,
        "source_message": git_information.commit_message,
        "source_author": git_information.commit_author,
        "source_dirty": str(git_information.is_dirty),
        "overrides": model_config_overrides,
        CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY: -1,
    })
Ejemplo n.º 14
0
    def download_checkpoints_from_recovery_run(azure_config: AzureConfig,
                                               config: DeepLearningConfig,
                                               run_context: Optional[Run] = None) -> RunRecovery:
        """
        Downloads checkpoints of run corresponding to the run_recovery_id in azure_config, and any
        checkpoints of the child runs if they exist.

        :param azure_config: Azure related configs.
        :param config: Model related configs.
        :param run_context: Context of the current run (will be used to find the target AML workspace)
        :return:RunRecovery
        """
        run_context = run_context or RUN_CONTEXT
        workspace = azure_config.get_workspace()

        # Find the run to recover in AML workspace
        if not azure_config.run_recovery_id:
            raise ValueError("A valid run_recovery_id is required to download recovery checkpoints, found None")

        run_to_recover = fetch_run(workspace, azure_config.run_recovery_id.strip())
        # Handle recovery of a HyperDrive cross validation run (from within a successor HyperDrive run,
        # not in ensemble creation). In this case, run_recovery_id refers to the parent prior run, so we
        # need to set run_to_recover to the child of that run whose split index is the same as that of
        # the current (child) run.
        if is_cross_validation_child_run(run_context):
            run_to_recover = next(x for x in fetch_child_runs(run_to_recover) if
                                  get_cross_validation_split_index(x) == get_cross_validation_split_index(run_context))

        return RunRecovery.download_checkpoints_from_run(config, run_to_recover)
Ejemplo n.º 15
0
 def __init__(
     self,
     model_config: ModelConfigBase,
     azure_config: Optional[AzureConfig] = None,
     project_root: Optional[Path] = None,
     post_cross_validation_hook: Optional[
         PostCrossValidationHookSignature] = None,
     model_deployment_hook: Optional[ModelDeploymentHookSignature] = None
 ) -> None:
     """
     Driver class to run a ML experiment. Note that the project root argument MUST be supplied when using InnerEye
     as a package!
     :param model_config: Model related configurations
     :param azure_config: Azure related configurations
     :param project_root: Project root. This should only be omitted if calling run_ml from the test suite. Supplying
     it is crucial when using InnerEye as a package or submodule!
     :param post_cross_validation_hook: A function to call after waiting for completion of cross validation runs.
     The function is called with the model configuration and the path to the downloaded and merged metrics files.
     :param model_deployment_hook: an optional function for deploying a model in an application-specific way.
     If present, it should take a model config (SegmentationModelBase), an AzureConfig, and an AzureML
     Model as arguments, and return an optional Path and a further object of any type.
     """
     self.model_config = model_config
     self.azure_config: AzureConfig = azure_config or AzureConfig()
     self.project_root: Path = project_root or fixed_paths.repository_root_directory(
     )
     self.post_cross_validation_hook = post_cross_validation_hook
     self.model_deployment_hook = model_deployment_hook
Ejemplo n.º 16
0
def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig,
                             comparison_blob_storage_paths: List[Tuple[str, str]]) -> \
        List[ComparisonBaseline]:
    comparison_baselines = []
    for (comparison_name, comparison_path) in comparison_blob_storage_paths:
        # Discard the experiment part of the run rec ID, if any.
        comparison_path = comparison_path.split(":")[-1]
        run_rec_id, blob_path_str = comparison_path.split("/", 1)
        run_rec_id = strip_prefix(run_rec_id, AZUREML_RUN_FOLDER_PREFIX)
        blob_path = Path(
            strip_prefix(blob_path_str, DEFAULT_AML_UPLOAD_DIR + "/"))
        run = azure_config.fetch_run(run_rec_id)
        (comparison_dataset_path,
         comparison_metrics_path) = get_comparison_baseline_paths(
             outputs_folder, blob_path, run, DATASET_CSV_FILE_NAME)
        # If both dataset.csv and metrics.csv were downloaded successfully, read their contents and
        # add a tuple to the comparison data.
        if comparison_dataset_path is not None and comparison_metrics_path is not None and \
                comparison_dataset_path.exists() and comparison_metrics_path.exists():
            comparison_baselines.append(
                ComparisonBaseline(comparison_name,
                                   pd.read_csv(comparison_dataset_path),
                                   pd.read_csv(comparison_metrics_path),
                                   run_rec_id))
        else:
            raise ValueError(
                f"could not find comparison data for run {run_rec_id}")
    return comparison_baselines
def test_submit_for_inference(test_output_dirs: OutputFolderForTests) -> None:
    """
    Execute the submit_for_inference script on the model that was recently trained. This starts an AzureML job,
    and downloads the segmentation. Then check if the segmentation was actually produced.
    :return:
    """
    model = get_most_recent_model()
    image_file = fixed_paths_for_tests.full_ml_test_data_path(
    ) / "train_and_test_data" / "id1_channel1.nii.gz"
    assert image_file.exists(), f"Image file not found: {image_file}"
    settings_file = fixed_paths.SETTINGS_YAML_FILE
    assert settings_file.exists(), f"Settings file not found: {settings_file}"
    azure_config = AzureConfig.from_yaml(
        settings_file, project_root=fixed_paths.repository_root_directory())
    # Read the name of the branch from environment, so that the inference experiment is also listed alongside
    # all other AzureML runs that belong to the current PR.
    build_branch = os.environ.get("BUILD_BRANCH", None)
    experiment_name = to_azure_friendly_string(
        build_branch) if build_branch else "model_inference"
    azure_config.get_git_information()
    args = [
        "--image_file",
        str(image_file), "--model_id", model.id, "--settings",
        str(settings_file), "--download_folder",
        str(test_output_dirs.root_dir), "--cluster", "training-nc12",
        "--experiment", experiment_name
    ]
    seg_path = test_output_dirs.root_dir / DEFAULT_RESULT_IMAGE_NAME
    assert not seg_path.exists(
    ), f"Result file {seg_path} should not yet exist"
    submit_for_inference.main(
        args, project_root=fixed_paths.repository_root_directory())
    assert seg_path.exists(), f"Result file {seg_path} was not created"
Ejemplo n.º 18
0
def test_score_image_dicom_mock_none(
        test_output_dirs: OutputFolderForTests) -> None:
    """
    Test that dicom in and dicom-rt out works.

    Finally there is no mocking and full image scoring is run using the PassThroughModel.

    :param test_output_dirs: Test output directories.
    """
    model_config = PassThroughModel()
    model_config.set_output_to(test_output_dirs.root_dir)
    checkpoint_path = model_config.checkpoint_folder / "checkpoint.ckpt"
    create_model_and_store_checkpoint(model_config, checkpoint_path)

    azure_config = AzureConfig()
    project_root = Path(__file__).parent.parent
    ml_runner = MLRunner(model_config=model_config,
                         azure_config=azure_config,
                         project_root=project_root)
    model_folder = test_output_dirs.root_dir / "final"
    ml_runner.copy_child_paths_to_folder(model_folder=model_folder,
                                         checkpoint_paths=[checkpoint_path])

    zipped_dicom_series_path = zip_dicom_series(model_folder)

    score_pipeline_config = ScorePipelineConfig(
        data_folder=zipped_dicom_series_path.parent,
        model_folder=str(model_folder),
        image_files=[str(zipped_dicom_series_path)],
        result_image_name=HNSEGMENTATION_FILE.name,
        use_gpu=False,
        use_dicom=True)

    segmentation = score_image(score_pipeline_config)
    assert_zip_file_contents(segmentation, HN_DICOM_RT_ZIPPED, model_folder)
def submit_for_inference(args: SubmitForInferenceConfig,
                         azure_config: AzureConfig) -> Optional[Path]:
    """
    Create and submit an inference to AzureML, and optionally download the resulting segmentation.
    :param azure_config: An object with all necessary information for accessing Azure.
    :param args: configuration, see SubmitForInferenceConfig
    :return: path to downloaded segmentation on local disc, or None if none.
    """
    logging.info(f"Building Azure configuration from {args.settings}")
    logging.info("Getting workspace")
    workspace = azure_config.get_workspace()
    logging.info("Identifying model")
    model = Model(workspace=workspace, id=args.model_id)
    model_id = model.id
    logging.info(f"Identified model {model_id}")
    source_directory = tempfile.TemporaryDirectory()
    source_directory_name = source_directory.name
    logging.info(
        f"Building inference run submission in {source_directory_name}")
    source_directory_path = Path(source_directory_name)
    copy_image_file(args.image_file,
                    source_directory_path / DEFAULT_DATA_FOLDER)
    # We copy over run_scoring.py, and score.py as well in case the model we're using
    # does not have sufficiently recent versions of those files.
    for base in ["run_scoring.py", "score.py"]:
        shutil.copyfile(base, str(source_directory_path / base))
    source_config = SourceConfig(
        root_folder=source_directory_name,
        entry_script=str(source_directory_path / "run_scoring.py"),
        script_params={
            "--data-folder": ".",
            "--spawnprocess": "python",
            "--model-id": model_id,
            "score.py": ""
        },
        conda_dependencies_files=download_conda_dependency_files(
            model, source_directory_path))
    estimator = create_estimator_from_configs(workspace, azure_config,
                                              source_config, [])
    exp = Experiment(workspace=workspace, name=args.experiment_name)
    run = exp.submit(estimator)
    logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}")
    logging.info(f"Run URL: {run.get_portal_url()}")
    if not args.keep_upload_folder:
        source_directory.cleanup()
        logging.info(f"Deleted submission directory {source_directory_name}")
    if args.download_folder is None:
        return None
    logging.info("Awaiting run completion")
    run.wait_for_completion()
    logging.info(f"Run has completed with status {run.get_status()}")
    download_path = choose_download_path(args.download_folder)
    logging.info(f"Attempting to download segmentation to {download_path}")
    run.download_file(DEFAULT_RESULT_IMAGE_NAME, str(download_path))
    if download_path.exists():
        logging.info(f"Downloaded segmentation to {download_path}")
    else:
        logging.warning("Segmentation NOT downloaded")
    return download_path
Ejemplo n.º 20
0
 def azure_config(self) -> AzureConfig:
     """
     Gets the AzureConfig instance that the script uses.
     :return:
     """
     if self._azure_config is None:
         self._azure_config = AzureConfig.from_yaml(self.settings_yaml_file, project_root=self.project_root)
     return self._azure_config
 def azure_config(self) -> AzureConfig:
     """
     Gets the AzureConfig instance that the script uses.
     :return:
     """
     if self._azure_config is None:
         self._azure_config = AzureConfig.from_yaml(Path(self.train_yaml_path))
     return self._azure_config
def report_structure_extremes(dataset_dir: str, yaml_file: str) -> None:
    """
    Writes structure-extreme lines for the subjects in a directory.
    If there are any structures with missing slices, a ValueError is raised after writing all the lines.
    This allows a build failure to be triggered when such structures exist.
    :param yaml_file: The path to the YAML file that contains all Azure-related options.
    :param dataset_dir: directory containing subject subdirectories with integer names.
    """
    azure_config = AzureConfig.from_yaml(yaml_file_path=Path(yaml_file))
    download_dataset_directory(azure_config, dataset_dir)
    subjects: Set[int] = set()
    series_map = None
    institution_map = None
    for subj in os.listdir(dataset_dir):
        try:
            subjects.add(int(subj))
        except ValueError:
            if subj == "dataset.csv":
                # We should find this in every dataset_dir.
                series_map, institution_map = populate_series_maps(os.path.join(dataset_dir, subj))
            pass
    if institution_map is None or series_map is None:
        raise FileNotFoundError(f"Cannot find {dataset_dir}/dataset.csv")
    if not subjects:
        print(f"No subject directories found in {dataset_dir}")
        return
    print(f"Found {len(subjects)} subjects in {dataset_dir}")
    # You could temporarily edit subjects to be an explicit list of integers here, to process only certain subjects:
    # subjects = [23, 42, 99]
    full_output_dir = os.path.join(dataset_dir, "structure_extremes_full")
    os.makedirs(full_output_dir)
    problems_output_dir = os.path.join(dataset_dir, "structure_extremes_problems")
    os.makedirs(problems_output_dir)
    n_missing = 0
    files_created: Set[str] = set()
    for (index, subj_int) in enumerate(sorted(subjects), 1):
        subj = str(subj_int)
        institution_id = institution_map.get(subj, "")
        out = open_with_header(os.path.join(full_output_dir, institution_id + ".txt"), files_created)
        err = None
        for line in report_structure_extremes_for_subject(os.path.join(dataset_dir, subj), series_map[subj]):
            out.write(line + "\n")
            if line.find(MISSING_SLICE_MARKER) > 0:
                if err is None:
                    err = open_with_header(os.path.join(problems_output_dir, institution_id + ".txt"), files_created)
                err.write(line + "\n")
                n_missing += 1
        out.close()
        if err is not None:
            err.close()
        if index % 25 == 0:
            print(f"Processed {index} subjects")
    print(f"Processed all {len(subjects)} subjects")
    upload_to_dataset_directory(azure_config, dataset_dir, files_created)
    # If we found any structures with missing slices, raise an exception, which should be
    # uncaught where necessary to make any appropriate build step fail.
    if n_missing > 0:
        raise ValueError(f"Found {n_missing} structures with missing slices")
Ejemplo n.º 23
0
def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      azure_dataset_id: str = "",
                      environment_name: str = "") -> ScriptRunConfig:
    """
    Creates a configuration to run the InnerEye training script in AzureML.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
    string to not use any datasets.
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
    when running inference for an existing model.
    :return: The configured script run.
    """
    if azure_dataset_id:
        azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
        if not azureml_dataset:
            raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
        named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
        dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
    else:
        dataset_consumption = None
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
    logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
                 f"source directory {source_config.root_folder})")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    run_config = RunConfiguration(
        script=entry_script_relative_path,
        arguments=source_config.script_params,
    )
    run_config.environment = get_or_create_python_environment(azure_config, source_config,
                                                              environment_name=environment_name)
    run_config.target = azure_config.cluster
    run_config.max_run_duration_seconds = max_run_duration
    if azure_config.num_nodes > 1:
        distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes)
        run_config.mpi = distributed_job_config
        run_config.framework = "Python"
        run_config.communicator = "IntelMpi"
        run_config.node_count = distributed_job_config.node_count
    if dataset_consumption:
        run_config.data = {dataset_consumption.name: dataset_consumption}
    # Use blob storage for storing the source, rather than the FileShares section of the storage account.
    run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
    script_run_config = ScriptRunConfig(
        source_directory=str(source_config.root_folder),
        run_config=run_config,
    )
    if azure_config.hyperdrive:
        script_run_config = source_config.hyperdrive_config_func(script_run_config)  # type: ignore
    return script_run_config
Ejemplo n.º 24
0
def main(settings_yaml_file: Optional[Path] = None,
         project_root: Optional[Path] = None) -> None:
    """
    Main function.
    """
    logging_to_stdout()
    config = ReportStructureExtremesConfig.parse_args()
    azure_config = AzureConfig.from_yaml(yaml_file_path=settings_yaml_file or config.settings,
                                         project_root=project_root)
    report_structure_extremes(config.dataset, azure_config)
 def azure_config(self) -> AzureConfig:
     """
     Gets the AzureConfig instance that the script uses. This will either read out a value that has
     previously been set, or create a new AzureConfig object from the YAML file and project root settings that
     the present object holds.
     """
     if self._azure_config is None:
         self._azure_config = AzureConfig.from_yaml(
             self.settings_yaml_file, project_root=self.project_root)
     return self._azure_config
def upload_to_dataset_directory(azure_config: AzureConfig, dataset_dir: str, files: Set[str]) -> None:
    if not files:
        return
    account_key = azure_config.get_dataset_storage_account_key()
    block_blob_service = BlockBlobService(account_name=azure_config.datasets_storage_account, account_key=account_key)
    container_name = os.path.join(azure_config.datasets_container, os.path.basename(dataset_dir))
    for path in files:
        blob_name = path[len(dataset_dir) + 1:]
        block_blob_service.create_blob_from_path(container_name, blob_name, path)
        print(f"Uploaded {path} to {azure_config.datasets_storage_account}:{container_name}/{blob_name}")
def download_dataset_directory(azure_config: AzureConfig, dataset_dir: str) -> bool:
    if os.path.isdir(dataset_dir):
        return False
    account_key = azure_config.get_dataset_storage_account_key()
    blobs_root_path = os.path.join(azure_config.datasets_container, os.path.basename(dataset_dir)) + "/"
    sys.stdout.write(f"Downloading data to {dataset_dir} ...")
    assert account_key is not None  # for mypy
    download_blobs(azure_config.datasets_storage_account, account_key, blobs_root_path, Path(dataset_dir))
    sys.stdout.write("done\n")
    return True
def monitor(monitor_config: AMLTensorBoardMonitorConfig,
            azure_config: AzureConfig) -> None:
    """
    Starts TensorBoard monitoring as per the provided arguments.
    :param monitor_config: The config containing information on which runs that need be monitored.
    :param azure_config: An AzureConfig object with secrets/keys to access the workspace.
    """
    # Fetch AzureML workspace and the experiment runs in it
    workspace = azure_config.get_workspace()

    if monitor_config.run_ids is not None:
        if len(monitor_config.run_ids) == 0:
            print("At least one run_recovery_id must be given for monitoring.")
            sys.exit(1)
        exp_runs = [
            azure_util.fetch_run(workspace, run_id)
            for run_id in monitor_config.run_ids
        ]
    else:
        if monitor_config.experiment_name not in workspace.experiments:
            print(f"The experiment: {monitor_config.experiment_name} doesn't "
                  f"exist in the {monitor_config.workspace_name} workspace.")
            sys.exit(1)

        experiment = Experiment(workspace, monitor_config.experiment_name)
        filters = common_util.get_items_from_string(
            monitor_config.run_status) if monitor_config.run_status else []

        exp_runs = azure_util.fetch_runs(experiment, filters)

        if len(exp_runs) == 0:
            _msg = "No runs to monitor"
            if monitor_config.run_status:
                _msg += f"with status [{monitor_config.run_status}]."
            print(_msg)
            sys.exit(1)

    # Start TensorBoard on executing machine
    ts = Tensorboard(exp_runs,
                     local_root=str(monitor_config.local_root),
                     port=monitor_config.port)

    print(
        "=============================================================================="
    )
    for run in exp_runs:
        print(f"Run URL: {run.get_portal_url()}")
    print("TensorBoard URL: ")
    ts.start()
    print(
        "==============================================================================\n\n"
    )
    input("Press Enter to close TensorBoard...")
    ts.stop()
Ejemplo n.º 29
0
def get_configs(
        default_model_config: SegmentationModelBase, yaml_file_path: Path
) -> Tuple[SegmentationModelBase, AzureConfig, Dict]:
    parser_result = create_parser(yaml_file_path)
    args = parser_result.args
    runner_config = AzureConfig(**args)
    logging_to_stdout(args["log_level"])
    config = default_model_config or ModelConfigLoader(
    ).create_model_config_from_name(runner_config.model)
    config.apply_overrides(parser_result.overrides, should_validate=False)
    return config, runner_config, args
def get_most_recent_model() -> Model:
    most_recent_run = get_most_recent_run()
    azure_config = AzureConfig.from_yaml(
        fixed_paths.SETTINGS_YAML_FILE,
        project_root=fixed_paths.repository_root_directory())
    workspace = azure_config.get_workspace()
    run = fetch_run(workspace, most_recent_run)
    tags = run.get_tags()
    model_id = tags.get(MODEL_ID_KEY_NAME, None)
    assert model_id, f"No model_id tag was found on run {most_recent_run}"
    return Model(workspace=workspace, id=model_id)