Beispiel #1
0
def _execute_in_conda_env(conda_env_path, command, install_mlflow, command_env=None):
    if command_env is None:
        command_env = os.environ
    env_id = os.environ.get("MLFLOW_HOME", VERSION) if install_mlflow else None
    conda_env_name = get_or_create_conda_env(conda_env_path, env_id=env_id)
    activate_conda_env = get_conda_command(conda_env_name)
    if install_mlflow:
        if "MLFLOW_HOME" in os.environ:  # dev version
            install_mlflow = "pip install -e {} 1>&2".format(os.environ["MLFLOW_HOME"])
        else:
            install_mlflow = "pip install mlflow=={} 1>&2".format(VERSION)

        activate_conda_env += [install_mlflow]
    if os.name != "nt":
        separator = " && "
    else:
        separator = " & "

    command = separator.join(activate_conda_env + [command])
    _logger.info("=== Running command '%s'", command)

    if os.name != "nt":
        child = subprocess.Popen(["bash", "-c", command], close_fds=True, env=command_env)
    else:
        child = subprocess.Popen(["cmd", "/c", command], close_fds=True, env=command_env)
    rc = child.wait()
    if rc != 0:
        raise Exception(
            "Command '{0}' returned non zero return code. Return code = {1}".format(command, rc)
        )
Beispiel #2
0
 def run(self, project_uri, entry_point, params, version, backend_config,
         tracking_uri, experiment_id):
     work_dir = fetch_and_validate_project(project_uri, version,
                                           entry_point, params)
     project = load_project(work_dir)
     if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
         run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
     else:
         run_id = None
     active_run = get_or_create_run(run_id, project_uri, experiment_id,
                                    work_dir, version, entry_point, params)
     command_args = []
     command_separator = " "
     use_conda = backend_config[PROJECT_USE_CONDA]
     synchronous = backend_config[PROJECT_SYNCHRONOUS]
     docker_args = backend_config[PROJECT_DOCKER_ARGS]
     storage_dir = backend_config[PROJECT_STORAGE_DIR]
     # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
     # environments, so the project will be executed inside a docker container.
     if project.docker_env:
         tracking.MlflowClient().set_tag(active_run.info.run_id,
                                         MLFLOW_PROJECT_ENV, "docker")
         validate_docker_env(project)
         validate_docker_installation()
         image = build_docker_image(
             work_dir=work_dir,
             repository_uri=project.name,
             base_image=project.docker_env.get('image'),
             run_id=active_run.info.run_id)
         command_args += _get_docker_command(
             image=image,
             active_run=active_run,
             docker_args=docker_args,
             volumes=project.docker_env.get("volumes"),
             user_env_vars=project.docker_env.get("environment"))
     # Synchronously create a conda environment (even though this may take some time)
     # to avoid failures due to multiple concurrent attempts to create the same conda env.
     elif use_conda:
         tracking.MlflowClient().set_tag(active_run.info.run_id,
                                         MLFLOW_PROJECT_ENV, "conda")
         command_separator = " && "
         conda_env_name = get_or_create_conda_env(project.conda_env_path)
         command_args += get_conda_command(conda_env_name)
     # In synchronous mode, run the entry point command in a blocking fashion, sending status
     # updates to the tracking server when finished. Note that the run state may not be
     # persisted to the tracking server if interrupted
     if synchronous:
         command_args += get_entry_point_command(project, entry_point,
                                                 params, storage_dir)
         command_str = command_separator.join(command_args)
         return _run_entry_point(command_str,
                                 work_dir,
                                 experiment_id,
                                 run_id=active_run.info.run_id)
     # Otherwise, invoke `mlflow run` in a subprocess
     return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                          entry_point=entry_point,
                                          parameters=params,
                                          experiment_id=experiment_id,
                                          use_conda=use_conda,
                                          storage_dir=storage_dir,
                                          run_id=active_run.info.run_id)
Beispiel #3
0
def _execute_in_conda_env(
    conda_env_name,
    command,
    install_mlflow,
    command_env=None,
    synchronous=True,
    preexec_fn=None,
    stdout=None,
    stderr=None,
    env_root_dir=None,
):
    """
    :param conda_env_path conda: conda environment file path
    :param command: command to run on the restored conda environment.
    :param install_mlflow: whether to install mlflow
    :param command_env: environment for child process.
    :param synchronous: If True, wait until server process exit and return 0, if process exit
                        with non-zero return code, raise exception.
                        If False, return the server process `Popen` instance immediately.
    :param stdout: Redirect server stdout
    :param stderr: Redirect server stderr
    :param env_root_dir: See doc of PyFuncBackend constructor argument `env_root_dir`.
    """
    if command_env is None:
        command_env = os.environ.copy()

    if env_root_dir is not None:
        command_env = {
            **command_env,
            **_get_conda_extra_env_vars(env_root_dir)
        }

    activate_conda_env = get_conda_command(conda_env_name)
    if install_mlflow:
        pip_install_mlflow = _get_pip_install_mlflow()
        activate_conda_env += [pip_install_mlflow]
    if _IS_UNIX:
        separator = " && "
    else:
        separator = " & "

    command = separator.join(activate_conda_env + [command])
    _logger.info("=== Running command '%s'", command)

    if _IS_UNIX:
        child = subprocess.Popen(
            ["bash", "-c", command],
            close_fds=True,
            env=command_env,
            preexec_fn=preexec_fn,
            stdout=stdout,
            stderr=stderr,
        )
    else:
        child = subprocess.Popen(
            ["cmd", "/c", command],
            close_fds=True,
            env=command_env,
            preexec_fn=preexec_fn,
            stdout=stdout,
            stderr=stderr,
        )

    if synchronous:
        rc = child.wait()
        if rc != 0:
            raise Exception(
                "Command '{0}' returned non zero return code. Return code = {1}"
                .format(command, rc))
        return 0
    else:
        return child
Beispiel #4
0
def _execute_in_conda_env(
    conda_env_path,
    command,
    install_mlflow,
    command_env=None,
    synchronous=True,
    preexec_fn=None,
    stdout=None,
    stderr=None,
):
    """
    :param conda_env_path conda: conda environment file path
    :param command: command to run on the restored conda environment.
    :install_mlflow: whether to install mlflow
    :command_env: environment for child process.
    :param synchronous: If True, wait until server process exit and return 0, if process exit
                        with non-zero return code, raise exception.
                        If False, return the server process `Popen` instance immediately.
    :param stdout: Redirect server stdout
    :param stderr: Redirect server stderr
    """
    if command_env is None:
        command_env = os.environ

    # PIP_NO_INPUT=1 make pip run in non-interactive mode,
    # otherwise pip might prompt "yes or no" and ask stdin input
    command_env["PIP_NO_INPUT"] = "1"

    env_id = os.environ.get("MLFLOW_HOME", VERSION) if install_mlflow else None
    conda_env_name = get_or_create_conda_env(conda_env_path, env_id=env_id)
    activate_conda_env = get_conda_command(conda_env_name)
    if install_mlflow:
        if "MLFLOW_HOME" in os.environ:  # dev version
            install_mlflow = "pip install -e {} 1>&2".format(os.environ["MLFLOW_HOME"])
        else:
            install_mlflow = "pip install mlflow=={} 1>&2".format(VERSION)

        activate_conda_env += [install_mlflow]
    if os.name != "nt":
        separator = " && "
        # Add "exec" before the starting scoring server command, so that the scoring server
        # process replaces the bash process, otherwise the scoring server process is created
        # as a child process of the bash process.
        # Note we in `mlflow.pyfunc.spark_udf`, use prctl PR_SET_PDEATHSIG to ensure scoring
        # server process being killed when UDF process exit. The PR_SET_PDEATHSIG can only
        # send signal to the bash process, if the scoring server process is created as a
        # child process of the bash process, then it cannot receive the signal sent by prctl.
        # TODO: For Windows, there's no equivalent things of Unix shell's exec. Windows also
        #  does not support prctl. We need to find an approach to address it.
        command = "exec " + command
    else:
        separator = " & "

    command = separator.join(activate_conda_env + [command])
    _logger.info("=== Running command '%s'", command)

    if os.name != "nt":
        child = subprocess.Popen(
            ["bash", "-c", command],
            close_fds=True,
            env=command_env,
            preexec_fn=preexec_fn,
            stdout=stdout,
            stderr=stderr,
        )
    else:
        child = subprocess.Popen(
            ["cmd", "/c", command],
            close_fds=True,
            env=command_env,
            preexec_fn=preexec_fn,
            stdout=stdout,
            stderr=stderr,
        )

    if synchronous:
        rc = child.wait()
        if rc != 0:
            raise Exception(
                "Command '{0}' returned non zero return code. Return code = {1}".format(command, rc)
            )
        return 0
    else:
        return child
Beispiel #5
0
    def run(
        self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id
    ):
        work_dir = fetch_and_validate_project(project_uri, version, entry_point, params)
        project = load_project(work_dir)
        if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
            run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
        else:
            run_id = None
        active_run = get_or_create_run(
            run_id, project_uri, experiment_id, work_dir, version, entry_point, params
        )
        command_args = []
        command_separator = " "
        env_manager = backend_config[PROJECT_ENV_MANAGER]
        synchronous = backend_config[PROJECT_SYNCHRONOUS]
        docker_args = backend_config[PROJECT_DOCKER_ARGS]
        storage_dir = backend_config[PROJECT_STORAGE_DIR]

        # Select an appropriate env manager for the project env type
        if env_manager is None:
            env_manager = _env_type_to_env_manager(project.env_type)
        else:
            if project.env_type == env_type.PYTHON and env_manager == _EnvManager.CONDA:
                raise MlflowException.invalid_parameter_value(
                    "python_env project cannot be executed using conda. Set `--env-manager` to "
                    "'virtualenv' or 'local' to execute this project."
                )

        # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
        # environments, so the project will be executed inside a docker container.
        if project.docker_env:
            from mlflow.projects.docker import (
                validate_docker_env,
                validate_docker_installation,
                build_docker_image,
            )

            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker")
            validate_docker_env(project)
            validate_docker_installation()
            image = build_docker_image(
                work_dir=work_dir,
                repository_uri=project.name,
                base_image=project.docker_env.get("image"),
                run_id=active_run.info.run_id,
            )
            command_args += _get_docker_command(
                image=image,
                active_run=active_run,
                docker_args=docker_args,
                volumes=project.docker_env.get("volumes"),
                user_env_vars=project.docker_env.get("environment"),
            )
        # Synchronously create a conda environment (even though this may take some time)
        # to avoid failures due to multiple concurrent attempts to create the same conda env.
        elif env_manager == _EnvManager.VIRTUALENV:
            tracking.MlflowClient().set_tag(
                active_run.info.run_id, MLFLOW_PROJECT_ENV, "virtualenv"
            )
            command_separator = " && "
            if project.env_type == env_type.CONDA:
                python_env = _PythonEnv.from_conda_yaml(project.env_config_path)
            else:
                python_env = _PythonEnv.from_yaml(project.env_config_path)
            python_bin_path = _install_python(python_env.python)
            env_root = _get_mlflow_virtualenv_root()
            work_dir_path = Path(work_dir)
            env_name = _get_virtualenv_name(python_env, work_dir_path)
            env_dir = Path(env_root).joinpath(env_name)
            activate_cmd = _create_virtualenv(work_dir_path, python_bin_path, env_dir, python_env)
            command_args += [activate_cmd]
        elif env_manager == _EnvManager.CONDA:
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda")
            command_separator = " && "
            conda_env_name = get_or_create_conda_env(project.env_config_path)
            command_args += get_conda_command(conda_env_name)

        # In synchronous mode, run the entry point command in a blocking fashion, sending status
        # updates to the tracking server when finished. Note that the run state may not be
        # persisted to the tracking server if interrupted
        if synchronous:
            command_args += get_entry_point_command(project, entry_point, params, storage_dir)
            command_str = command_separator.join(command_args)
            return _run_entry_point(
                command_str, work_dir, experiment_id, run_id=active_run.info.run_id
            )
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir,
            entry_point=entry_point,
            parameters=params,
            experiment_id=experiment_id,
            env_manager=env_manager,
            docker_args=docker_args,
            storage_dir=storage_dir,
            run_id=active_run.info.run_id,
        )