def _get_docker_command(image, active_run, docker_args=None, volumes=None, user_env_vars=None): from mlflow.projects.docker import get_docker_tracking_cmd_and_envs docker_path = "docker" cmd = [docker_path, "run", "--rm"] if docker_args: for name, value in docker_args.items(): # Passed just the name as boolean flag if isinstance(value, bool) and value: if len(name) == 1: cmd += ["-" + name] else: cmd += ["--" + name] else: # Passed name=value if len(name) == 1: cmd += ["-" + name, value] else: cmd += ["--" + name, value] env_vars = get_run_env_vars(run_id=active_run.info.run_id, experiment_id=active_run.info.experiment_id) tracking_uri = tracking.get_tracking_uri() tracking_cmds, tracking_envs = get_docker_tracking_cmd_and_envs( tracking_uri) artifact_cmds, artifact_envs = _get_docker_artifact_storage_cmd_and_envs( active_run.info.artifact_uri) cmd += tracking_cmds + artifact_cmds env_vars.update(tracking_envs) env_vars.update(artifact_envs) if user_env_vars is not None: for user_entry in user_env_vars: if isinstance(user_entry, list): # User has defined a new environment variable for the docker environment env_vars[user_entry[0]] = user_entry[1] else: # User wants to copy an environment variable from system environment system_var = os.environ.get(user_entry) if system_var is None: raise MlflowException( "This project expects the %s environment variables to " "be set on the machine running the project, but %s was " "not set. Please ensure all expected environment variables " "are set" % (", ".join(user_env_vars), user_entry)) env_vars[user_entry] = system_var if volumes is not None: for v in volumes: cmd += ["-v", v] for key, value in env_vars.items(): cmd += ["-e", "{key}={value}".format(key=key, value=value)] cmd += [image.tags[0]] return cmd
def _run_entry_point(command, work_dir, experiment_id, run_id): """ Run an entry point command in a subprocess, returning a SubmittedRun that can be used to query the run's status. :param command: Entry point command to run :param work_dir: Working directory in which to run the command :param run_id: MLflow run ID associated with the entry point execution. """ env = os.environ.copy() env.update(get_run_env_vars(run_id, experiment_id)) env.update(get_databricks_env_vars(tracking_uri=mlflow.get_tracking_uri())) _logger.info("=== Running command '%s' in run with ID '%s' === ", command, run_id) # in case os name is not 'nt', we are not running on windows. It introduces # bash command otherwise. if os.name != "nt": process = subprocess.Popen(["bash", "-c", command], close_fds=True, cwd=work_dir, env=env) else: # process = subprocess.Popen(command, close_fds=True, cwd=work_dir, env=env) process = subprocess.Popen(["cmd", "/c", command], close_fds=True, cwd=work_dir, env=env) return LocalSubmittedRun(run_id, process)
def _invoke_mlflow_run_subprocess(work_dir, entry_point, parameters, experiment_id, use_conda, storage_dir, run_id): """ Run an MLflow project asynchronously by invoking ``mlflow run`` in a subprocess, returning a SubmittedRun that can be used to query run status. """ _logger.info("=== Asynchronously launching MLflow run with ID %s ===", run_id) mlflow_run_arr = _build_mlflow_run_cmd(uri=work_dir, entry_point=entry_point, storage_dir=storage_dir, use_conda=use_conda, run_id=run_id, parameters=parameters) env_vars = get_run_env_vars(run_id, experiment_id) env_vars.update(get_databricks_env_vars(mlflow.get_tracking_uri())) mlflow_run_subprocess = _run_mlflow_run_cmd(mlflow_run_arr, env_vars) return LocalSubmittedRun(run_id, mlflow_run_subprocess)
def _run( uri, experiment_id, entry_point, version, parameters, docker_args, backend_name, backend_config, use_conda, storage_dir, synchronous, ): """ Helper that delegates to the project-running method corresponding to the passed-in backend. Returns a ``SubmittedRun`` corresponding to the project run. """ tracking_store_uri = tracking.get_tracking_uri() backend_config[PROJECT_USE_CONDA] = use_conda backend_config[PROJECT_SYNCHRONOUS] = synchronous backend_config[PROJECT_DOCKER_ARGS] = docker_args backend_config[PROJECT_STORAGE_DIR] = storage_dir # TODO: remove this check once kubernetes execution has been refactored if backend_name not in {"databricks", "kubernetes"}: backend = loader.load_backend(backend_name) if backend: submitted_run = backend.run( uri, entry_point, parameters, version, backend_config, tracking_store_uri, experiment_id, ) tracking.MlflowClient().set_tag(submitted_run.run_id, MLFLOW_PROJECT_BACKEND, backend_name) return submitted_run work_dir = fetch_and_validate_project(uri, version, entry_point, parameters) project = load_project(work_dir) _validate_execution_environment(project, backend_name) active_run = get_or_create_run(None, uri, experiment_id, work_dir, version, entry_point, parameters) if backend_name == "databricks": tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "databricks") from mlflow.projects.databricks import run_databricks return run_databricks( remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=experiment_id, cluster_spec=backend_config, ) elif backend_name == "kubernetes": from mlflow.projects.docker import ( build_docker_image, validate_docker_env, validate_docker_installation, ) from mlflow.projects import kubernetes as kb tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "kubernetes") validate_docker_env(project) validate_docker_installation() kube_config = _parse_kubernetes_config(backend_config) image = build_docker_image( work_dir=work_dir, repository_uri=kube_config["repository-uri"], base_image=project.docker_env.get("image"), run_id=active_run.info.run_id, ) image_digest = kb.push_image_to_registry(image.tags[0]) submitted_run = kb.run_kubernetes_job( project.name, active_run, image.tags[0], image_digest, get_entry_point_command(project, entry_point, parameters, storage_dir), get_run_env_vars(run_id=active_run.info.run_uuid, experiment_id=active_run.info.experiment_id), kube_config.get("kube-context", None), kube_config["kube-job-template"], ) return submitted_run supported_backends = ["databricks", "kubernetes"] + list( loader.MLFLOW_BACKENDS.keys()) raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (backend_name, supported_backends))