def _wait_for(submitted_run_obj): """Wait on the passed-in submitted run, reporting its status to the tracking server.""" run_id = submitted_run_obj.run_id active_run = None # Note: there's a small chance we fail to report the run's status to the tracking server if # we're interrupted before we reach the try block below try: active_run = tracking._get_existing_run(run_id) if run_id is not None else None if submitted_run_obj.wait(): eprint("=== Run (ID '%s') succeeded ===" % run_id) _maybe_set_run_terminated(active_run, "FINISHED") else: _maybe_set_run_terminated(active_run, "FAILED") raise ExecutionException("=== Run (ID '%s') failed ===" % run_id) except KeyboardInterrupt: eprint("=== Run (ID '%s') === interrupted, cancelling run ===" % run_id) submitted_run_obj.cancel() _maybe_set_run_terminated(active_run, "FAILED") raise
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None, mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True, use_temp_cwd=False, storage_dir=None, block=True, run_id=None): """ Helper that delegates to the project-running method corresponding to the passed-in mode. Returns a ``SubmittedRun`` corresponding to the project run. """ exp_id = experiment_id or tracking._get_experiment_id() parameters = parameters or {} if mode == "databricks": from mlflow.projects.databricks import run_databricks return run_databricks( uri=uri, entry_point=entry_point, version=version, parameters=parameters, experiment_id=exp_id, cluster_spec=cluster_spec, git_username=git_username, git_password=git_password) elif mode == "local" or mode is None: work_dir = _fetch_project(uri, use_temp_cwd, version, git_username, git_password) project = _load_project(project_dir=work_dir) project.get_entry_point(entry_point)._validate_parameters(parameters) # Synchronously create a conda environment (even though this may take some time) to avoid # failures due to multiple concurrent attempts to create the same conda env. if use_conda: _maybe_create_conda_env(conda_env_path=os.path.join(work_dir, project.conda_env)) if run_id: active_run = tracking._get_existing_run(run_id) else: active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters) # In blocking mode, run the entry point command in blocking fashion, sending status updates # to the tracking server when finished. Note that the run state may not be persisted to the # tracking server if interrupted if block: command = _get_entry_point_command( work_dir, entry_point, use_conda, parameters, storage_dir) return _run_entry_point(command, work_dir, exp_id, run_id=active_run.run_info.run_uuid) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.run_info.run_uuid) supported_modes = ["local", "databricks"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (mode, supported_modes))