Exemple #1
0
def create_and_submit_experiment(
        azure_config: AzureConfig,
        source_config: SourceConfig,
        model_config_overrides: str,
        azure_dataset_id: str) -> Run:
    """
    Creates an AzureML experiment in the workspace and submits it for execution.
    :param azure_config: azure related configurations to setup valid workspace
    :param source_config: The information about which code should be submitted, and which arguments should be used.
    :param model_config_overrides: A string that describes which model parameters were overwritten by commandline
     arguments in the present run. This is only used for diagnostic purposes (it is set as a Tag on the run).
    :param azure_dataset_id: The name of the dataset in blob storage to be used for this run.
    :returns: Run object for the submitted AzureML run
    """
    workspace = azure_config.get_workspace()
    experiment_name = create_experiment_name(azure_config)
    exp = Experiment(workspace=workspace, name=azure_util.to_azure_friendly_string(experiment_name))
    script_run_config = create_run_config(azure_config, source_config, azure_dataset_id)

    # submit a training/testing run associated with the experiment
    run: Run = exp.submit(script_run_config)

    # set metadata for the run
    set_run_tags(run, azure_config, model_config_overrides)

    print("\n==============================================================================")
    print(f"Successfully queued new run {run.id} in experiment: {exp.name}")

    if azure_config.run_recovery_id:
        print(f"\nRecovered from: {azure_config.run_recovery_id}")

    recovery_id = azure_util.create_run_recovery_id(run)
    recovery_file = Path(RUN_RECOVERY_FILE)
    if recovery_file.exists():
        recovery_file.unlink()
    recovery_file.write_text(recovery_id)

    print("Experiment URL: {}".format(exp.get_portal_url()))
    print("Run URL: {}".format(run.get_portal_url()))
    print("If this run fails, re-start runner.py and supply these additional arguments: "
          f"--run_recovery_id={recovery_id}")
    print(f"The run recovery ID has been written to this file: {recovery_file}")
    print("==============================================================================")
    if azure_config.tensorboard and azure_config.azureml:
        print("Starting TensorBoard now because you specified --tensorboard")
        monitor(monitor_config=AMLTensorBoardMonitorConfig(run_ids=[run.id]), azure_config=azure_config)
    else:
        print(f"To monitor this run locally using TensorBoard, run the script: "
              f"InnerEye/Azure/tensorboard_monitor.py --run_ids={run.id}")
        print("==============================================================================")
    return run
        def after_submission_hook(azure_run: Run) -> None:
            """
            A function that will be called right after job submission.
            """
            # Set the default display name to what was provided as the "tag". This will affect single runs
            # and Hyperdrive parent runs
            if self.azure_config.tag:
                azure_run.display_name = self.azure_config.tag
            # Add an extra tag that depends on the run that was actually submitted. This is used for later filtering
            # run in cross validation analysis
            recovery_id = create_run_recovery_id(azure_run)
            azure_run.tag(RUN_RECOVERY_ID_KEY_NAME, recovery_id)
            print(
                "If this run fails, re-start runner.py and supply these additional arguments: "
                f"--run_recovery_id={recovery_id}")
            if self.azure_config.tensorboard:
                print(
                    "Starting TensorBoard now because you specified --tensorboard"
                )
                monitor(monitor_config=AMLTensorBoardMonitorConfig(
                    run_ids=[azure_run.id]),
                        azure_config=self.azure_config)
            else:
                print(
                    f"To monitor this run locally using TensorBoard, run the script: "
                    f"InnerEye/Azure/tensorboard_monitor.py --run_ids={azure_run.id}"
                )

            if self.azure_config.wait_for_completion:
                # We want the job output to be visible on the console. Do not exit yet if the job fails, because we
                # may need to download the pytest result file.
                azure_run.wait_for_completion(show_output=True,
                                              raise_on_error=False)
                if self.azure_config.pytest_mark:
                    # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
                    # A build step will pick up that file and publish it to Azure DevOps.
                    # If pytest_mark is set, this file must exist.
                    logging.info("Downloading pytest result file.")
                    download_pytest_result(azure_run)
                if azure_run.status == RunStatus.FAILED:
                    raise ValueError(
                        f"The AzureML run failed. Please check this URL for details: "
                        f"{azure_run.get_portal_url()}")
Exemple #3
0
def create_and_submit_experiment(azure_config: AzureConfig,
                                 script_run_config: ScriptRunConfig,
                                 commandline_args: str) -> Run:
    """
    Creates an AzureML experiment in the workspace and submits it for execution.
    :param azure_config: azure related configurations to setup a valid workspace.
    :param script_run_config: The configuration for the script that should be run inside of AzureML.
    :param commandline_args: A string with all commandline arguments that were provided to the runner. These are only
    used to set a tag on the submitted AzureML run.
    :returns: Run object for the submitted AzureML run
    """
    workspace = azure_config.get_workspace()
    experiment_name = create_experiment_name(azure_config)
    exp = Experiment(workspace=workspace,
                     name=azure_util.to_azure_friendly_string(experiment_name))

    # submit a training/testing run associated with the experiment
    run: Run = exp.submit(script_run_config)

    if is_offline_run_context(run):
        # This codepath will only be executed in unit tests, when exp.submit is mocked.
        return run

    # Set metadata for the run.
    set_run_tags(run, azure_config, commandline_args=commandline_args)

    print(
        "\n=============================================================================="
    )
    print(f"Successfully queued new run {run.id} in experiment: {exp.name}")

    if azure_config.run_recovery_id:
        print(f"\nRecovered from: {azure_config.run_recovery_id}")

    recovery_id = azure_util.create_run_recovery_id(run)
    recovery_file = Path(RUN_RECOVERY_FILE)
    if recovery_file.exists():
        recovery_file.unlink()
    recovery_file.write_text(recovery_id)

    print("Experiment URL: {}".format(exp.get_portal_url()))
    print("Run URL: {}".format(run.get_portal_url()))
    print(
        "If this run fails, re-start runner.py and supply these additional arguments: "
        f"--run_recovery_id={recovery_id}")
    print(
        f"The run recovery ID has been written to this file: {recovery_file}")
    print(
        "=============================================================================="
    )
    if azure_config.tensorboard and azure_config.azureml:
        print("Starting TensorBoard now because you specified --tensorboard")
        monitor(monitor_config=AMLTensorBoardMonitorConfig(run_ids=[run.id]),
                azure_config=azure_config)
    else:
        print(
            f"To monitor this run locally using TensorBoard, run the script: "
            f"InnerEye/Azure/tensorboard_monitor.py --run_ids={run.id}")
        print(
            "=============================================================================="
        )
    return run