Example #1
0
 def submit_to_azureml(self) -> Run:
     """
     Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for
     completion and the Run did not succeed.
     """
     # The adal package creates a logging.info line each time it gets an authentication token, avoid that.
     logging.getLogger('adal-python').setLevel(logging.WARNING)
     if not self.model_config.azure_dataset_id:
         raise ValueError(
             "When running on AzureML, the 'azure_dataset_id' property must be set."
         )
     model_config_overrides = str(self.model_config.overrides)
     source_config = SourceConfig(
         root_folder=self.project_root,
         entry_script=Path(sys.argv[0]).resolve(),
         conda_dependencies_files=[
             get_environment_yaml_file(),
             self.project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
         ],
         hyperdrive_config_func=lambda estimator: self.model_config.
         get_hyperdrive_config(estimator),
         # For large jobs, upload of results times out frequently because of large checkpoint files. Default is 600
         upload_timeout_seconds=86400,
     )
     source_config.set_script_params_except_submit_flag()
     assert self.model_config.azure_dataset_id is not None  # to stop mypy complaining about next line
     azure_run = submit_to_azureml(self.azure_config, source_config,
                                   model_config_overrides,
                                   self.model_config.azure_dataset_id)
     logging.info("Job submission to AzureML done.")
     if self.azure_config.pytest_mark:
         # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
         # A build step will pick up that file and publish it to Azure DevOps.
         # If pytest_mark is set, this file must exist.
         logging.info("Downloading pytest result file.")
         download_pytest_result(azure_run)
     else:
         logging.info(
             "No pytest_mark present, hence not downloading the pytest result file."
         )
     status = azure_run.get_status()
     # For PR builds where we wait for job completion, the job must have ended in a COMPLETED state.
     # If a pytest failed, the runner has exited with code -1 (see below)
     if self.azure_config.wait_for_completion and status != RunStatus.COMPLETED:
         logging.error(f"Job completed with status {status}. Exiting.")
         exit(-1)
     return azure_run
Example #2
0
 def submit_to_azureml(self) -> Run:
     """
     Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for
     completion and the Run did not succeed.
     """
     # The adal package creates a logging.info line each time it gets an authentication token, avoid that.
     logging.getLogger('adal-python').setLevel(logging.WARNING)
     # Azure core prints full HTTP requests even in INFO mode
     logging.getLogger('azure').setLevel(logging.WARNING)
     # PyJWT prints out warnings that are beyond our control
     warnings.filterwarnings("ignore", category=DeprecationWarning)
     if isinstance(self.model_config, DeepLearningConfig) and not self.lightning_container.azure_dataset_id:
         raise ValueError("When running an InnerEye built-in model in AzureML, the 'azure_dataset_id' "
                          "property must be set.")
     hyperdrive_func = lambda run_config: self.model_config.get_hyperdrive_config(run_config)  # type: ignore
     source_config = SourceConfig(
         root_folder=self.project_root,
         entry_script=Path(sys.argv[0]).resolve(),
         conda_dependencies_files=get_all_environment_files(self.project_root),
         hyperdrive_config_func=hyperdrive_func,
         # For large jobs, upload of results can time out because of large checkpoint files. Default is 600
         upload_timeout_seconds=86400,
     )
     source_config.set_script_params_except_submit_flag()
     azure_run = submit_to_azureml(self.azure_config, source_config,
                                   self.lightning_container.all_azure_dataset_ids(),
                                   self.lightning_container.all_dataset_mountpoints())
     logging.info("Job submission to AzureML done.")
     if self.azure_config.pytest_mark and self.azure_config.wait_for_completion:
         # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
         # A build step will pick up that file and publish it to Azure DevOps.
         # If pytest_mark is set, this file must exist.
         logging.info("Downloading pytest result file.")
         download_pytest_result(azure_run)
     else:
         logging.info("No pytest_mark present, hence not downloading the pytest result file.")
     # For PR builds where we wait for job completion, the job must have ended in a COMPLETED state.
     if self.azure_config.wait_for_completion and not is_run_and_child_runs_completed(azure_run):
         raise ValueError(f"Run {azure_run.id} in experiment {azure_run.experiment.name} or one of its child "
                          "runs failed.")
     return azure_run