def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, all_azure_dataset_ids: List[str], all_dataset_mountpoints: List[str], environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run. :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ dataset_consumptions = create_dataset_consumptions( azure_config, all_azure_dataset_ids, all_dataset_mountpoints) # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to( source_config.root_folder).as_posix() logging.info( f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds( azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment( azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration( node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if len(dataset_consumptions) > 0: run_config.data = { dataset.name: dataset for dataset in dataset_consumptions } # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get( WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func( script_run_config) # type: ignore return script_run_config
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, azure_dataset_id: str = "", environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty string to not use any datasets. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ if azure_dataset_id: azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id) if not azureml_dataset: raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.") named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY) dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download() else: dataset_consumption = None # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix() logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment(azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if dataset_consumption: run_config.data = {dataset_consumption.name: dataset_consumption} # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func(script_run_config) # type: ignore return script_run_config
def create_estimator_from_configs( azure_config: AzureConfig, source_config: SourceConfig, estimator_inputs: List[DatasetConsumptionConfig]) -> PyTorch: """ Create an return a PyTorch estimator from the provided configuration information. :param azure_config: Azure configuration, used to store various values for the job to be submitted :param source_config: source configutation, for other needed values :param estimator_inputs: value for the "inputs" field of the estimator. :return: """ # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = Path(source_config.entry_script).relative_to( source_config.root_folder).as_posix() logging.info( f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") environment_variables = { "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC": str(source_config.upload_timeout_seconds), "MKL_SERVICE_FORCE_INTEL": "1", **(source_config.environment_variables or {}) } # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be # necessary if the innereye package is installed. It is necessary when working with an outer project and # InnerEye as a git submodule and submitting jobs from the local machine. # In case of version conflicts, the package version in the outer project is given priority. conda_dependencies = merge_conda_dependencies( source_config.conda_dependencies_files) # type: ignore if azure_config.pip_extra_index_url: # When an extra-index-url is supplied, swap the order in which packages are searched for. # This is necessary if we need to consume packages from extra-index that clash with names of packages on # pypi conda_dependencies.set_pip_option( f"--index-url {azure_config.pip_extra_index_url}") conda_dependencies.set_pip_option( "--extra-index-url https://pypi.org/simple") # create Estimator environment framework_version = pytorch_version_from_conda_dependencies( conda_dependencies) logging.info(f"PyTorch framework version: {framework_version}") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds( azure_config.max_run_duration) workspace = azure_config.get_workspace() estimator = PyTorch( source_directory=source_config.root_folder, entry_script=entry_script_relative_path, script_params=source_config.script_params, compute_target=azure_config.cluster, # Use blob storage for storing the source, rather than the FileShares section of the storage account. source_directory_data_store=workspace.datastores.get( WORKSPACE_DEFAULT_BLOB_STORE_NAME), inputs=estimator_inputs, environment_variables=environment_variables, shm_size=azure_config.docker_shm_size, use_docker=True, use_gpu=True, framework_version=framework_version, max_run_duration_seconds=max_run_duration) estimator.run_config.environment.python.conda_dependencies = conda_dependencies # We'd like to log the estimator config, but conversion to string fails when the Estimator has some inputs. # logging.info(azure_util.estimator_to_string(estimator)) if azure_config.hyperdrive: estimator = source_config.hyperdrive_config_func( estimator) # type: ignore return estimator