Esempio n. 1
0
def _wait_for(submitted_run_obj):
    """Wait on the passed-in submitted run, reporting its status to the tracking server."""
    run_id = submitted_run_obj.run_id
    active_run = None
    # Note: there's a small chance we fail to report the run's status to the tracking server if
    # we're interrupted before we reach the try block below
    try:
        active_run = tracking._get_existing_run(run_id) if run_id is not None else None
        if submitted_run_obj.wait():
            eprint("=== Run (ID '%s') succeeded ===" % run_id)
            _maybe_set_run_terminated(active_run, "FINISHED")
        else:
            _maybe_set_run_terminated(active_run, "FAILED")
            raise ExecutionException("=== Run (ID '%s') failed ===" % run_id)
    except KeyboardInterrupt:
        eprint("=== Run (ID '%s') === interrupted, cancelling run ===" % run_id)
        submitted_run_obj.cancel()
        _maybe_set_run_terminated(active_run, "FAILED")
        raise
Esempio n. 2
0
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None,
         mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True,
         use_temp_cwd=False, storage_dir=None, block=True, run_id=None):
    """
    Helper that delegates to the project-running method corresponding to the passed-in mode.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    exp_id = experiment_id or tracking._get_experiment_id()
    parameters = parameters or {}
    if mode == "databricks":
        from mlflow.projects.databricks import run_databricks
        return run_databricks(
            uri=uri, entry_point=entry_point, version=version, parameters=parameters,
            experiment_id=exp_id, cluster_spec=cluster_spec, git_username=git_username,
            git_password=git_password)
    elif mode == "local" or mode is None:
        work_dir = _fetch_project(uri, use_temp_cwd, version, git_username, git_password)
        project = _load_project(project_dir=work_dir)
        project.get_entry_point(entry_point)._validate_parameters(parameters)
        # Synchronously create a conda environment (even though this may take some time) to avoid
        # failures due to multiple concurrent attempts to create the same conda env.
        if use_conda:
            _maybe_create_conda_env(conda_env_path=os.path.join(work_dir, project.conda_env))
        if run_id:
            active_run = tracking._get_existing_run(run_id)
        else:
            active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters)
        # In blocking mode, run the entry point command in blocking fashion, sending status updates
        # to the tracking server when finished. Note that the run state may not be persisted to the
        # tracking server if interrupted
        if block:
            command = _get_entry_point_command(
                work_dir, entry_point, use_conda, parameters, storage_dir)
            return _run_entry_point(command, work_dir, exp_id, run_id=active_run.run_info.run_uuid)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id,
            use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.run_info.run_uuid)
    supported_modes = ["local", "databricks"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (mode, supported_modes))