def run(self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id): work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) active_run = get_or_create_run(None, project_uri, experiment_id, work_dir, version, entry_point, params) return DummySubmittedRun(active_run.info.run_id)
def test_fetch_create_and_log(tmpdir): entry_point_name = "entry_point" parameters = { "method_name": "string", } entry_point = _project_spec.EntryPoint(entry_point_name, parameters, "run_model.sh") mock_fetched_project = _project_spec.Project( env_type="local", env_config_path=None, entry_points={entry_point_name: entry_point}, docker_env=None, name="my_project", ) experiment_id = mlflow.create_experiment("test_fetch_project") expected_dir = tmpdir project_uri = "http://someuri/myproject.git" user_param = {"method_name": "newton"} with mock.patch("mlflow.projects.utils._fetch_project", return_value=expected_dir): with mock.patch("mlflow.projects._project_spec.load_project", return_value=mock_fetched_project): work_dir = fetch_and_validate_project("", "", entry_point_name, user_param) project = load_project(work_dir) assert mock_fetched_project == project assert expected_dir == work_dir # Create a run active_run = get_or_create_run( run_id=None, uri=project_uri, experiment_id=experiment_id, work_dir=work_dir, version=None, entry_point=entry_point_name, parameters=user_param, ) # check tags run = mlflow.get_run(active_run.info.run_id) assert MLFLOW_PROJECT_ENTRY_POINT in run.data.tags assert MLFLOW_SOURCE_NAME in run.data.tags assert entry_point_name == run.data.tags[ MLFLOW_PROJECT_ENTRY_POINT] assert project_uri == run.data.tags[MLFLOW_SOURCE_NAME] assert user_param == run.data.params
def run(self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id): work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) project = load_project(work_dir) if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config: run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG] else: run_id = None active_run = get_or_create_run(run_id, project_uri, experiment_id, work_dir, version, entry_point, params) command_args = [] command_separator = " " use_conda = backend_config[PROJECT_USE_CONDA] synchronous = backend_config[PROJECT_SYNCHRONOUS] docker_args = backend_config[PROJECT_DOCKER_ARGS] storage_dir = backend_config[PROJECT_STORAGE_DIR] # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") validate_docker_env(project) validate_docker_installation() image = build_docker_image( work_dir=work_dir, repository_uri=project.name, base_image=project.docker_env.get('image'), run_id=active_run.info.run_id) command_args += _get_docker_command( image=image, active_run=active_run, docker_args=docker_args, volumes=project.docker_env.get("volumes"), user_env_vars=project.docker_env.get("environment")) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif use_conda: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda") command_separator = " && " conda_env_name = get_or_create_conda_env(project.conda_env_path) command_args += get_conda_command(conda_env_name) # In synchronous mode, run the entry point command in a blocking fashion, sending status # updates to the tracking server when finished. Note that the run state may not be # persisted to the tracking server if interrupted if synchronous: command_args += get_entry_point_command(project, entry_point, params, storage_dir) command_str = command_separator.join(command_args) return _run_entry_point(command_str, work_dir, experiment_id, run_id=active_run.info.run_id) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess(work_dir=work_dir, entry_point=entry_point, parameters=params, experiment_id=experiment_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_id)
def _run(uri, experiment_id, entry_point="main", version=None, parameters=None, docker_args=None, backend_name=None, backend_config=None, use_conda=True, storage_dir=None, synchronous=True): """ Helper that delegates to the project-running method corresponding to the passed-in backend. Returns a ``SubmittedRun`` corresponding to the project run. """ tracking_store_uri = tracking.get_tracking_uri() if backend_name: backend = loader.load_backend(backend_name) if backend: submitted_run = backend.run(uri, entry_point, parameters, version, backend_config, experiment_id, tracking_store_uri) tracking.MlflowClient().set_tag(submitted_run.run_id, MLFLOW_PROJECT_BACKEND, backend_name) return submitted_run work_dir = fetch_and_validate_project(uri, version, entry_point, parameters) project = load_project(work_dir) _validate_execution_environment(project, backend_name) existing_run_id = None if backend_name == "local" and _MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config: existing_run_id = backend_config[_MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG] active_run = get_or_create_run(existing_run_id, uri, experiment_id, work_dir, version, entry_point, parameters) if backend_name == "databricks": tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "databricks") from mlflow.projects.databricks import run_databricks return run_databricks(remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=experiment_id, cluster_spec=backend_config) elif backend_name == "local" or backend_name is None: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "local") command_args = [] command_separator = " " # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") _validate_docker_env(project) _validate_docker_installation() image = _build_docker_image( work_dir=work_dir, repository_uri=project.name, base_image=project.docker_env.get('image'), run_id=active_run.info.run_id) command_args += _get_docker_command( image=image, active_run=active_run, docker_args=docker_args, volumes=project.docker_env.get("volumes"), user_env_vars=project.docker_env.get("environment")) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif use_conda: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda") command_separator = " && " conda_env_name = _get_or_create_conda_env(project.conda_env_path) command_args += _get_conda_command(conda_env_name) # In synchronous mode, run the entry point command in a blocking fashion, sending status # updates to the tracking server when finished. Note that the run state may not be # persisted to the tracking server if interrupted if synchronous: command_args += _get_entry_point_command(project, entry_point, parameters, storage_dir) command_str = command_separator.join(command_args) return _run_entry_point(command_str, work_dir, experiment_id, run_id=active_run.info.run_id) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess(work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=experiment_id, use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_id) elif backend_name == "kubernetes": from mlflow.projects import kubernetes as kb tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "kubernetes") _validate_docker_env(project) _validate_docker_installation() kube_config = _parse_kubernetes_config(backend_config) image = _build_docker_image( work_dir=work_dir, repository_uri=kube_config["repository-uri"], base_image=project.docker_env.get('image'), run_id=active_run.info.run_id) image_digest = kb.push_image_to_registry(image.tags[0]) submitted_run = kb.run_kubernetes_job( project.name, active_run, image.tags[0], image_digest, _get_entry_point_command(project, entry_point, parameters, storage_dir), _get_run_env_vars(run_id=active_run.info.run_uuid, experiment_id=active_run.info.experiment_id), kube_config.get('kube-context', None), kube_config['kube-job-template']) return submitted_run supported_backends = ["local", "databricks", "kubernetes"] raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (backend_name, supported_backends))
def _run( uri, experiment_id, entry_point, version, parameters, docker_args, backend_name, backend_config, use_conda, storage_dir, synchronous, ): """ Helper that delegates to the project-running method corresponding to the passed-in backend. Returns a ``SubmittedRun`` corresponding to the project run. """ tracking_store_uri = tracking.get_tracking_uri() backend_config[PROJECT_USE_CONDA] = use_conda backend_config[PROJECT_SYNCHRONOUS] = synchronous backend_config[PROJECT_DOCKER_ARGS] = docker_args backend_config[PROJECT_STORAGE_DIR] = storage_dir # TODO: remove this check once kubernetes execution has been refactored if backend_name not in {"databricks", "kubernetes"}: backend = loader.load_backend(backend_name) if backend: submitted_run = backend.run( uri, entry_point, parameters, version, backend_config, tracking_store_uri, experiment_id, ) tracking.MlflowClient().set_tag(submitted_run.run_id, MLFLOW_PROJECT_BACKEND, backend_name) return submitted_run work_dir = fetch_and_validate_project(uri, version, entry_point, parameters) project = load_project(work_dir) _validate_execution_environment(project, backend_name) active_run = get_or_create_run(None, uri, experiment_id, work_dir, version, entry_point, parameters) if backend_name == "databricks": tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "databricks") from mlflow.projects.databricks import run_databricks return run_databricks( remote_run=active_run, uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters, experiment_id=experiment_id, cluster_spec=backend_config, ) elif backend_name == "kubernetes": from mlflow.projects.docker import ( build_docker_image, validate_docker_env, validate_docker_installation, ) from mlflow.projects import kubernetes as kb tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "kubernetes") validate_docker_env(project) validate_docker_installation() kube_config = _parse_kubernetes_config(backend_config) image = build_docker_image( work_dir=work_dir, repository_uri=kube_config["repository-uri"], base_image=project.docker_env.get("image"), run_id=active_run.info.run_id, ) image_digest = kb.push_image_to_registry(image.tags[0]) submitted_run = kb.run_kubernetes_job( project.name, active_run, image.tags[0], image_digest, get_entry_point_command(project, entry_point, parameters, storage_dir), get_run_env_vars(run_id=active_run.info.run_uuid, experiment_id=active_run.info.experiment_id), kube_config.get("kube-context", None), kube_config["kube-job-template"], ) return submitted_run supported_backends = ["databricks", "kubernetes"] + list( loader.MLFLOW_BACKENDS.keys()) raise ExecutionException("Got unsupported execution mode %s. Supported " "values: %s" % (backend_name, supported_backends))
def run(self, project_uri: str, entry_point: str, params: Dict, version: str, backend_config: Dict, tracking_uri: str, experiment_id: str) -> SubmittedRun: _logger.info('using yarn backend') _logger.info(locals()) work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) active_run = get_or_create_run(None, project_uri, experiment_id, work_dir, version, entry_point, params) _logger.info(f"run_id={active_run.info.run_id}") _logger.info(f"work_dir={work_dir}") project = load_project(work_dir) storage_dir = backend_config[PROJECT_STORAGE_DIR] entry_point_command = project.get_entry_point(entry_point)\ .compute_command(params, storage_dir) _logger.info(f"entry_point_command={entry_point_command}") if project.conda_env_path: spec_file = project.conda_env_path else: spec_file = os.path.join(work_dir, "requirements.txt") if not os.path.exists(spec_file): raise ValueError package_path = cluster_pack.upload_spec(spec_file) _logger.info(package_path) additional_files = [] for file in os.listdir(work_dir): full_path = os.path.join(work_dir, file) if os.path.isfile(full_path): additional_files.append(full_path) entry_point, args = try_split_cmd(entry_point_command) _logger.info(f"args {entry_point} {args}") if "MLFLOW_YARN_TESTS" in os.environ: # we need to have a real tracking server setup to be able to push the run id here env = {"MLFLOW_TRACKING_URI": "file:/tmp/mlflow"} else: env = { "MLFLOW_RUN_ID": active_run.info.run_id, "MLFLOW_TRACKING_URI": mlflow.get_tracking_uri(), "MLFLOW_EXPERIMENT_ID": experiment_id } _backend_dict = _get_backend_dict(work_dir) # update config with what has been passed with --backend-config <json-new-config> for key in _backend_dict.keys(): if key in backend_config: _backend_dict[key] = backend_config[key] _logger.info(f"backend config: {_backend_dict}") app_id = skein_launcher.submit(self._skein_client, module_name=entry_point, args=args, package_path=package_path, additional_files=additional_files, env_vars=env, process_logs=_upload_logs, **_backend_dict) MlflowClient().set_tag(active_run.info.run_id, "skein_application_id", app_id) return YarnSubmittedRun(self._skein_client, app_id, active_run.info.run_id)
def run( self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id ): work_dir = fetch_and_validate_project(project_uri, version, entry_point, params) project = load_project(work_dir) if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config: run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG] else: run_id = None active_run = get_or_create_run( run_id, project_uri, experiment_id, work_dir, version, entry_point, params ) command_args = [] command_separator = " " env_manager = backend_config[PROJECT_ENV_MANAGER] synchronous = backend_config[PROJECT_SYNCHRONOUS] docker_args = backend_config[PROJECT_DOCKER_ARGS] storage_dir = backend_config[PROJECT_STORAGE_DIR] # Select an appropriate env manager for the project env type if env_manager is None: env_manager = _env_type_to_env_manager(project.env_type) else: if project.env_type == env_type.PYTHON and env_manager == _EnvManager.CONDA: raise MlflowException.invalid_parameter_value( "python_env project cannot be executed using conda. Set `--env-manager` to " "'virtualenv' or 'local' to execute this project." ) # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml # environments, so the project will be executed inside a docker container. if project.docker_env: from mlflow.projects.docker import ( validate_docker_env, validate_docker_installation, build_docker_image, ) tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker") validate_docker_env(project) validate_docker_installation() image = build_docker_image( work_dir=work_dir, repository_uri=project.name, base_image=project.docker_env.get("image"), run_id=active_run.info.run_id, ) command_args += _get_docker_command( image=image, active_run=active_run, docker_args=docker_args, volumes=project.docker_env.get("volumes"), user_env_vars=project.docker_env.get("environment"), ) # Synchronously create a conda environment (even though this may take some time) # to avoid failures due to multiple concurrent attempts to create the same conda env. elif env_manager == _EnvManager.VIRTUALENV: tracking.MlflowClient().set_tag( active_run.info.run_id, MLFLOW_PROJECT_ENV, "virtualenv" ) command_separator = " && " if project.env_type == env_type.CONDA: python_env = _PythonEnv.from_conda_yaml(project.env_config_path) else: python_env = _PythonEnv.from_yaml(project.env_config_path) python_bin_path = _install_python(python_env.python) env_root = _get_mlflow_virtualenv_root() work_dir_path = Path(work_dir) env_name = _get_virtualenv_name(python_env, work_dir_path) env_dir = Path(env_root).joinpath(env_name) activate_cmd = _create_virtualenv(work_dir_path, python_bin_path, env_dir, python_env) command_args += [activate_cmd] elif env_manager == _EnvManager.CONDA: tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda") command_separator = " && " conda_env_name = get_or_create_conda_env(project.env_config_path) command_args += get_conda_command(conda_env_name) # In synchronous mode, run the entry point command in a blocking fashion, sending status # updates to the tracking server when finished. Note that the run state may not be # persisted to the tracking server if interrupted if synchronous: command_args += get_entry_point_command(project, entry_point, params, storage_dir) command_str = command_separator.join(command_args) return _run_entry_point( command_str, work_dir, experiment_id, run_id=active_run.info.run_id ) # Otherwise, invoke `mlflow run` in a subprocess return _invoke_mlflow_run_subprocess( work_dir=work_dir, entry_point=entry_point, parameters=params, experiment_id=experiment_id, env_manager=env_manager, docker_args=docker_args, storage_dir=storage_dir, run_id=active_run.info.run_id, )