コード例 #1
0
def test_get_or_create_conda_env_capture_output_mode(tmp_path):
    conda_yaml_file = tmp_path / "conda.yaml"
    conda_yaml_file.write_text("""
channels:
- conda-forge
dependencies:
- pip:
  - scikit-learn==99.99.99
""")
    with pytest.raises(
            ShellCommandException,
            match=
            "Could not find a version that satisfies the requirement scikit-learn==99.99.99",
    ):
        get_or_create_conda_env(str(conda_yaml_file), capture_output=True)
コード例 #2
0
ファイル: backend.py プロジェクト: mdneuzerling/mlflow
def _execute_in_conda_env(conda_env_path, command, install_mlflow, command_env=None):
    if command_env is None:
        command_env = os.environ
    env_id = os.environ.get("MLFLOW_HOME", VERSION) if install_mlflow else None
    conda_env_name = get_or_create_conda_env(conda_env_path, env_id=env_id)
    activate_conda_env = get_conda_command(conda_env_name)
    if install_mlflow:
        if "MLFLOW_HOME" in os.environ:  # dev version
            install_mlflow = "pip install -e {} 1>&2".format(os.environ["MLFLOW_HOME"])
        else:
            install_mlflow = "pip install mlflow=={} 1>&2".format(VERSION)

        activate_conda_env += [install_mlflow]
    if os.name != "nt":
        separator = " && "
    else:
        separator = " & "

    command = separator.join(activate_conda_env + [command])
    _logger.info("=== Running command '%s'", command)

    if os.name != "nt":
        child = subprocess.Popen(["bash", "-c", command], close_fds=True, env=command_env)
    else:
        child = subprocess.Popen(["cmd", "/c", command], close_fds=True, env=command_env)
    rc = child.wait()
    if rc != 0:
        raise Exception(
            "Command '{0}' returned non zero return code. Return code = {1}".format(command, rc)
        )
コード例 #3
0
ファイル: backend.py プロジェクト: bkbonde/mlflow
    def prepare_env(self, model_uri, capture_output=False):
        local_path = _download_artifact_from_uri(model_uri)

        command = 'python -c ""'
        if self._env_manager == _EnvManager.VIRTUALENV:
            activate_cmd = _get_or_create_virtualenv(
                local_path,
                self._env_id,
                env_root_dir=self._env_root_dir,
                capture_output=capture_output,
            )
            return _execute_in_virtualenv(
                activate_cmd,
                command,
                self._install_mlflow,
                env_root_dir=self._env_root_dir,
                capture_output=capture_output,
            )
        elif self._env_manager == _EnvManager.LOCAL or ENV not in self._config:
            return 0

        conda_env_path = os.path.join(local_path, self._config[ENV])
        conda_env_name = get_or_create_conda_env(
            conda_env_path,
            env_id=self._env_id,
            capture_output=capture_output,
            env_root_dir=self._env_root_dir,
        )

        command = 'python -c ""'
        return _execute_in_conda_env(conda_env_name,
                                     command,
                                     self._install_mlflow,
                                     env_root_dir=self._env_root_dir)
コード例 #4
0
    def predict(self, model_uri, input_path, output_path, content_type,
                json_format):
        """
        Generate predictions using generic python model saved with MLflow.
        Return the prediction results as a JSON.
        """
        local_path = _download_artifact_from_uri(model_uri)
        # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure
        # platform compatibility.
        local_uri = path_to_local_file_uri(local_path)
        if self._env_manager is _EnvManager.CONDA and ENV in self._config:
            conda_env_path = os.path.join(local_path, self._config[ENV])

            conda_env_name = get_or_create_conda_env(conda_env_path,
                                                     env_id=self._env_id,
                                                     capture_output=False)

            command = (
                'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict('
                "model_uri={model_uri}, "
                "input_path={input_path}, "
                "output_path={output_path}, "
                "content_type={content_type}, "
                'json_format={json_format})"').format(
                    model_uri=repr(local_uri),
                    input_path=repr(input_path),
                    output_path=repr(output_path),
                    content_type=repr(content_type),
                    json_format=repr(json_format),
                )
            return _execute_in_conda_env(conda_env_name, command,
                                         self._install_mlflow)
        else:
            scoring_server._predict(local_uri, input_path, output_path,
                                    content_type, json_format)
コード例 #5
0
    def prepare_env(self, model_uri, capture_output=False):
        local_path = _download_artifact_from_uri(model_uri)
        if self._env_manager is _EnvManager.LOCAL or ENV not in self._config:
            return 0
        conda_env_path = os.path.join(local_path, self._config[ENV])

        conda_env_name = get_or_create_conda_env(conda_env_path,
                                                 env_id=self._env_id,
                                                 capture_output=capture_output)

        command = 'python -c ""'
        return _execute_in_conda_env(conda_env_name, command,
                                     self._install_mlflow)
コード例 #6
0
 def run(self, project_uri, entry_point, params, version, backend_config,
         tracking_uri, experiment_id):
     work_dir = fetch_and_validate_project(project_uri, version,
                                           entry_point, params)
     project = load_project(work_dir)
     if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
         run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
     else:
         run_id = None
     active_run = get_or_create_run(run_id, project_uri, experiment_id,
                                    work_dir, version, entry_point, params)
     command_args = []
     command_separator = " "
     use_conda = backend_config[PROJECT_USE_CONDA]
     synchronous = backend_config[PROJECT_SYNCHRONOUS]
     docker_args = backend_config[PROJECT_DOCKER_ARGS]
     storage_dir = backend_config[PROJECT_STORAGE_DIR]
     # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
     # environments, so the project will be executed inside a docker container.
     if project.docker_env:
         tracking.MlflowClient().set_tag(active_run.info.run_id,
                                         MLFLOW_PROJECT_ENV, "docker")
         validate_docker_env(project)
         validate_docker_installation()
         image = build_docker_image(
             work_dir=work_dir,
             repository_uri=project.name,
             base_image=project.docker_env.get('image'),
             run_id=active_run.info.run_id)
         command_args += _get_docker_command(
             image=image,
             active_run=active_run,
             docker_args=docker_args,
             volumes=project.docker_env.get("volumes"),
             user_env_vars=project.docker_env.get("environment"))
     # Synchronously create a conda environment (even though this may take some time)
     # to avoid failures due to multiple concurrent attempts to create the same conda env.
     elif use_conda:
         tracking.MlflowClient().set_tag(active_run.info.run_id,
                                         MLFLOW_PROJECT_ENV, "conda")
         command_separator = " && "
         conda_env_name = get_or_create_conda_env(project.conda_env_path)
         command_args += get_conda_command(conda_env_name)
     # In synchronous mode, run the entry point command in a blocking fashion, sending status
     # updates to the tracking server when finished. Note that the run state may not be
     # persisted to the tracking server if interrupted
     if synchronous:
         command_args += get_entry_point_command(project, entry_point,
                                                 params, storage_dir)
         command_str = command_separator.join(command_args)
         return _run_entry_point(command_str,
                                 work_dir,
                                 experiment_id,
                                 run_id=active_run.info.run_id)
     # Otherwise, invoke `mlflow run` in a subprocess
     return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                          entry_point=entry_point,
                                          parameters=params,
                                          experiment_id=experiment_id,
                                          use_conda=use_conda,
                                          storage_dir=storage_dir,
                                          run_id=active_run.info.run_id)
コード例 #7
0
def _execute_in_conda_env(
    conda_env_path,
    command,
    install_mlflow,
    command_env=None,
    synchronous=True,
    preexec_fn=None,
    stdout=None,
    stderr=None,
):
    """
    :param conda_env_path conda: conda environment file path
    :param command: command to run on the restored conda environment.
    :install_mlflow: whether to install mlflow
    :command_env: environment for child process.
    :param synchronous: If True, wait until server process exit and return 0, if process exit
                        with non-zero return code, raise exception.
                        If False, return the server process `Popen` instance immediately.
    :param stdout: Redirect server stdout
    :param stderr: Redirect server stderr
    """
    if command_env is None:
        command_env = os.environ

    # PIP_NO_INPUT=1 make pip run in non-interactive mode,
    # otherwise pip might prompt "yes or no" and ask stdin input
    command_env["PIP_NO_INPUT"] = "1"

    env_id = os.environ.get("MLFLOW_HOME", VERSION) if install_mlflow else None
    conda_env_name = get_or_create_conda_env(conda_env_path, env_id=env_id)
    activate_conda_env = get_conda_command(conda_env_name)
    if install_mlflow:
        if "MLFLOW_HOME" in os.environ:  # dev version
            install_mlflow = "pip install -e {} 1>&2".format(os.environ["MLFLOW_HOME"])
        else:
            install_mlflow = "pip install mlflow=={} 1>&2".format(VERSION)

        activate_conda_env += [install_mlflow]
    if os.name != "nt":
        separator = " && "
        # Add "exec" before the starting scoring server command, so that the scoring server
        # process replaces the bash process, otherwise the scoring server process is created
        # as a child process of the bash process.
        # Note we in `mlflow.pyfunc.spark_udf`, use prctl PR_SET_PDEATHSIG to ensure scoring
        # server process being killed when UDF process exit. The PR_SET_PDEATHSIG can only
        # send signal to the bash process, if the scoring server process is created as a
        # child process of the bash process, then it cannot receive the signal sent by prctl.
        # TODO: For Windows, there's no equivalent things of Unix shell's exec. Windows also
        #  does not support prctl. We need to find an approach to address it.
        command = "exec " + command
    else:
        separator = " & "

    command = separator.join(activate_conda_env + [command])
    _logger.info("=== Running command '%s'", command)

    if os.name != "nt":
        child = subprocess.Popen(
            ["bash", "-c", command],
            close_fds=True,
            env=command_env,
            preexec_fn=preexec_fn,
            stdout=stdout,
            stderr=stderr,
        )
    else:
        child = subprocess.Popen(
            ["cmd", "/c", command],
            close_fds=True,
            env=command_env,
            preexec_fn=preexec_fn,
            stdout=stdout,
            stderr=stderr,
        )

    if synchronous:
        rc = child.wait()
        if rc != 0:
            raise Exception(
                "Command '{0}' returned non zero return code. Return code = {1}".format(command, rc)
            )
        return 0
    else:
        return child
コード例 #8
0
ファイル: backend.py プロジェクト: bkbonde/mlflow
    def serve(
        self,
        model_uri,
        port,
        host,
        timeout,
        enable_mlserver,
        synchronous=True,
        stdout=None,
        stderr=None,
    ):  # pylint: disable=W0221
        """
        Serve pyfunc model locally.
        """
        local_path = _download_artifact_from_uri(model_uri)

        server_implementation = mlserver if enable_mlserver else scoring_server
        command, command_env = server_implementation.get_cmd(
            local_path, port, host, timeout, self._nworkers)

        if sys.platform.startswith("linux"):

            def setup_sigterm_on_parent_death():
                """
                Uses prctl to automatically send SIGTERM to the command process when its parent is
                dead.

                This handles the case when the parent is a PySpark worker process.
                If a user cancels the PySpark job, the worker process gets killed, regardless of
                PySpark daemon and worker reuse settings.
                We use prctl to ensure the command process receives SIGTERM after spark job
                cancellation.
                The command process itself should handle SIGTERM properly.
                This is a no-op on macOS because prctl is not supported.

                Note:
                When a pyspark job canceled, the UDF python process are killed by signal "SIGKILL",
                This case neither "atexit" nor signal handler can capture SIGKILL signal.
                prctl is the only way to capture SIGKILL signal.
                """
                try:
                    import ctypes
                    import signal

                    libc = ctypes.CDLL("libc.so.6")
                    # Set the parent process death signal of the command process to SIGTERM.
                    libc.prctl(1,
                               signal.SIGTERM)  # PR_SET_PDEATHSIG, see prctl.h
                except OSError as e:
                    # TODO: find approach for supporting MacOS/Windows system which does
                    #  not support prctl.
                    warnings.warn(
                        f"Setup libc.prctl PR_SET_PDEATHSIG failed, error {repr(e)}."
                    )

        else:
            setup_sigterm_on_parent_death = None

        if _IS_UNIX:
            # Add "exec" before the starting scoring server command, so that the scoring server
            # process replaces the bash process, otherwise the scoring server process is created
            # as a child process of the bash process.
            # Note we in `mlflow.pyfunc.spark_udf`, use prctl PR_SET_PDEATHSIG to ensure scoring
            # server process being killed when UDF process exit. The PR_SET_PDEATHSIG can only
            # send signal to the bash process, if the scoring server process is created as a
            # child process of the bash process, then it cannot receive the signal sent by prctl.
            # TODO: For Windows, there's no equivalent things of Unix shell's exec. Windows also
            #  does not support prctl. We need to find an approach to address it.
            command = "exec " + command

        if self._env_manager == _EnvManager.CONDA and ENV in self._config:
            conda_env_path = os.path.join(local_path, self._config[ENV])

            conda_env_name = get_or_create_conda_env(
                conda_env_path,
                env_id=self._env_id,
                capture_output=False,
                env_root_dir=self._env_root_dir,
            )

            child_proc = _execute_in_conda_env(
                conda_env_name,
                command,
                self._install_mlflow,
                command_env=command_env,
                synchronous=False,
                preexec_fn=setup_sigterm_on_parent_death,
                stdout=stdout,
                stderr=stderr,
                env_root_dir=self._env_root_dir,
            )
        elif self._env_manager == _EnvManager.VIRTUALENV:
            activate_cmd = _get_or_create_virtualenv(
                local_path, self._env_id, env_root_dir=self._env_root_dir)
            child_proc = _execute_in_virtualenv(
                activate_cmd,
                command,
                self._install_mlflow,
                command_env=command_env,
                capture_output=False,
                synchronous=False,
                env_root_dir=self._env_root_dir,
                preexec_fn=setup_sigterm_on_parent_death,
                stdout=stdout,
                stderr=stderr,
            )
        else:
            _logger.info("=== Running command '%s'", command)

            if os.name != "nt":
                command = ["bash", "-c", command]

            child_proc = subprocess.Popen(
                command,
                env=command_env,
                preexec_fn=setup_sigterm_on_parent_death,
                stdout=stdout,
                stderr=stderr,
            )

        if synchronous:
            rc = child_proc.wait()
            if rc != 0:
                raise Exception(
                    "Command '{0}' returned non zero return code. Return code = {1}"
                    .format(command, rc))
            return 0
        else:
            return child_proc
コード例 #9
0
    def run(
        self, project_uri, entry_point, params, version, backend_config, tracking_uri, experiment_id
    ):
        work_dir = fetch_and_validate_project(project_uri, version, entry_point, params)
        project = load_project(work_dir)
        if MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG in backend_config:
            run_id = backend_config[MLFLOW_LOCAL_BACKEND_RUN_ID_CONFIG]
        else:
            run_id = None
        active_run = get_or_create_run(
            run_id, project_uri, experiment_id, work_dir, version, entry_point, params
        )
        command_args = []
        command_separator = " "
        env_manager = backend_config[PROJECT_ENV_MANAGER]
        synchronous = backend_config[PROJECT_SYNCHRONOUS]
        docker_args = backend_config[PROJECT_DOCKER_ARGS]
        storage_dir = backend_config[PROJECT_STORAGE_DIR]

        # Select an appropriate env manager for the project env type
        if env_manager is None:
            env_manager = _env_type_to_env_manager(project.env_type)
        else:
            if project.env_type == env_type.PYTHON and env_manager == _EnvManager.CONDA:
                raise MlflowException.invalid_parameter_value(
                    "python_env project cannot be executed using conda. Set `--env-manager` to "
                    "'virtualenv' or 'local' to execute this project."
                )

        # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
        # environments, so the project will be executed inside a docker container.
        if project.docker_env:
            from mlflow.projects.docker import (
                validate_docker_env,
                validate_docker_installation,
                build_docker_image,
            )

            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker")
            validate_docker_env(project)
            validate_docker_installation()
            image = build_docker_image(
                work_dir=work_dir,
                repository_uri=project.name,
                base_image=project.docker_env.get("image"),
                run_id=active_run.info.run_id,
            )
            command_args += _get_docker_command(
                image=image,
                active_run=active_run,
                docker_args=docker_args,
                volumes=project.docker_env.get("volumes"),
                user_env_vars=project.docker_env.get("environment"),
            )
        # Synchronously create a conda environment (even though this may take some time)
        # to avoid failures due to multiple concurrent attempts to create the same conda env.
        elif env_manager == _EnvManager.VIRTUALENV:
            tracking.MlflowClient().set_tag(
                active_run.info.run_id, MLFLOW_PROJECT_ENV, "virtualenv"
            )
            command_separator = " && "
            if project.env_type == env_type.CONDA:
                python_env = _PythonEnv.from_conda_yaml(project.env_config_path)
            else:
                python_env = _PythonEnv.from_yaml(project.env_config_path)
            python_bin_path = _install_python(python_env.python)
            env_root = _get_mlflow_virtualenv_root()
            work_dir_path = Path(work_dir)
            env_name = _get_virtualenv_name(python_env, work_dir_path)
            env_dir = Path(env_root).joinpath(env_name)
            activate_cmd = _create_virtualenv(work_dir_path, python_bin_path, env_dir, python_env)
            command_args += [activate_cmd]
        elif env_manager == _EnvManager.CONDA:
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda")
            command_separator = " && "
            conda_env_name = get_or_create_conda_env(project.env_config_path)
            command_args += get_conda_command(conda_env_name)

        # In synchronous mode, run the entry point command in a blocking fashion, sending status
        # updates to the tracking server when finished. Note that the run state may not be
        # persisted to the tracking server if interrupted
        if synchronous:
            command_args += get_entry_point_command(project, entry_point, params, storage_dir)
            command_str = command_separator.join(command_args)
            return _run_entry_point(
                command_str, work_dir, experiment_id, run_id=active_run.info.run_id
            )
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir,
            entry_point=entry_point,
            parameters=params,
            experiment_id=experiment_id,
            env_manager=env_manager,
            docker_args=docker_args,
            storage_dir=storage_dir,
            run_id=active_run.info.run_id,
        )