def submit_to_azureml(self) -> Run: """ Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for completion and the Run did not succeed. """ # The adal package creates a logging.info line each time it gets an authentication token, avoid that. logging.getLogger('adal-python').setLevel(logging.WARNING) if not self.model_config.azure_dataset_id: raise ValueError( "When running on AzureML, the 'azure_dataset_id' property must be set." ) model_config_overrides = str(self.model_config.overrides) source_config = SourceConfig( root_folder=self.project_root, entry_script=Path(sys.argv[0]).resolve(), conda_dependencies_files=[ get_environment_yaml_file(), self.project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME ], hyperdrive_config_func=lambda estimator: self.model_config. get_hyperdrive_config(estimator), # For large jobs, upload of results times out frequently because of large checkpoint files. Default is 600 upload_timeout_seconds=86400, ) source_config.set_script_params_except_submit_flag() assert self.model_config.azure_dataset_id is not None # to stop mypy complaining about next line azure_run = submit_to_azureml(self.azure_config, source_config, model_config_overrides, self.model_config.azure_dataset_id) logging.info("Job submission to AzureML done.") if self.azure_config.pytest_mark: # The AzureML job can optionally run pytest. Attempt to download it to the current directory. # A build step will pick up that file and publish it to Azure DevOps. # If pytest_mark is set, this file must exist. logging.info("Downloading pytest result file.") download_pytest_result(azure_run) else: logging.info( "No pytest_mark present, hence not downloading the pytest result file." ) status = azure_run.get_status() # For PR builds where we wait for job completion, the job must have ended in a COMPLETED state. # If a pytest failed, the runner has exited with code -1 (see below) if self.azure_config.wait_for_completion and status != RunStatus.COMPLETED: logging.error(f"Job completed with status {status}. Exiting.") exit(-1) return azure_run
def submit_to_azureml(self) -> Run: """ Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for completion and the Run did not succeed. """ # The adal package creates a logging.info line each time it gets an authentication token, avoid that. logging.getLogger('adal-python').setLevel(logging.WARNING) # Azure core prints full HTTP requests even in INFO mode logging.getLogger('azure').setLevel(logging.WARNING) # PyJWT prints out warnings that are beyond our control warnings.filterwarnings("ignore", category=DeprecationWarning) if isinstance(self.model_config, DeepLearningConfig) and not self.lightning_container.azure_dataset_id: raise ValueError("When running an InnerEye built-in model in AzureML, the 'azure_dataset_id' " "property must be set.") hyperdrive_func = lambda run_config: self.model_config.get_hyperdrive_config(run_config) # type: ignore source_config = SourceConfig( root_folder=self.project_root, entry_script=Path(sys.argv[0]).resolve(), conda_dependencies_files=get_all_environment_files(self.project_root), hyperdrive_config_func=hyperdrive_func, # For large jobs, upload of results can time out because of large checkpoint files. Default is 600 upload_timeout_seconds=86400, ) source_config.set_script_params_except_submit_flag() azure_run = submit_to_azureml(self.azure_config, source_config, self.lightning_container.all_azure_dataset_ids(), self.lightning_container.all_dataset_mountpoints()) logging.info("Job submission to AzureML done.") if self.azure_config.pytest_mark and self.azure_config.wait_for_completion: # The AzureML job can optionally run pytest. Attempt to download it to the current directory. # A build step will pick up that file and publish it to Azure DevOps. # If pytest_mark is set, this file must exist. logging.info("Downloading pytest result file.") download_pytest_result(azure_run) else: logging.info("No pytest_mark present, hence not downloading the pytest result file.") # For PR builds where we wait for job completion, the job must have ended in a COMPLETED state. if self.azure_config.wait_for_completion and not is_run_and_child_runs_completed(azure_run): raise ValueError(f"Run {azure_run.id} in experiment {azure_run.experiment.name} or one of its child " "runs failed.") return azure_run