Example #1
0
def test_process_pip_requirements(tmpdir):
    conda_env, reqs, cons = _process_pip_requirements(["a"])
    assert _get_pip_deps(conda_env) == ["mlflow", "a"]
    assert reqs == ["mlflow", "a"]
    assert cons == []

    conda_env, reqs, cons = _process_pip_requirements(["a"],
                                                      pip_requirements=["b"])
    assert _get_pip_deps(conda_env) == ["mlflow", "b"]
    assert reqs == ["mlflow", "b"]
    assert cons == []

    # Ensure a requirement for mlflow is preserved
    conda_env, reqs, cons = _process_pip_requirements(
        ["a"], pip_requirements=["mlflow==1.2.3"])
    assert _get_pip_deps(conda_env) == ["mlflow==1.2.3"]
    assert reqs == ["mlflow==1.2.3"]
    assert cons == []

    conda_env, reqs, cons = _process_pip_requirements(
        ["a"], extra_pip_requirements=["b"])
    assert _get_pip_deps(conda_env) == ["mlflow", "a", "b"]
    assert reqs == ["mlflow", "a", "b"]
    assert cons == []

    con_file = tmpdir.join("constraints.txt")
    con_file.write("c")
    conda_env, reqs, cons = _process_pip_requirements(
        ["a"], pip_requirements=["b", f"-c {con_file.strpath}"])
    assert _get_pip_deps(conda_env) == ["mlflow", "b", "-c constraints.txt"]
    assert reqs == ["mlflow", "b", "-c constraints.txt"]
    assert cons == ["c"]
Example #2
0
def test_process_conda_env(tmpdir):
    def make_conda_env(pip_deps):
        return {
            "name": "mlflow-env",
            "channels": ["conda-forge"],
            "dependencies": ["python=3.7.9", "pip", {"pip": pip_deps}],
        }

    conda_env, reqs, cons = _process_conda_env(make_conda_env(["a"]))
    assert _get_pip_deps(conda_env) == ["mlflow", "a"]
    assert reqs == ["mlflow", "a"]
    assert cons == []

    conda_env_file = tmpdir.join("conda_env.yaml")
    conda_env_file.write(yaml.dump(make_conda_env(["a"])))
    conda_env, reqs, cons = _process_conda_env(conda_env_file.strpath)
    assert _get_pip_deps(conda_env) == ["mlflow", "a"]
    assert reqs == ["mlflow", "a"]
    assert cons == []

    # Ensure a requirement for mlflow is preserved
    conda_env, reqs, cons = _process_conda_env(make_conda_env(["mlflow==1.2.3"]))
    assert _get_pip_deps(conda_env) == ["mlflow==1.2.3"]
    assert reqs == ["mlflow==1.2.3"]
    assert cons == []

    con_file = tmpdir.join("constraints.txt")
    con_file.write("c")
    conda_env, reqs, cons = _process_conda_env(make_conda_env(["a", f"-c {con_file.strpath}"]))
    assert _get_pip_deps(conda_env) == ["mlflow", "a", "-c constraints.txt"]
    assert reqs == ["mlflow", "a", "-c constraints.txt"]
    assert cons == ["c"]

    with pytest.raises(TypeError, match=r"Expected .+, but got `int`"):
        _process_conda_env(0)
Example #3
0
def _assert_pip_requirements(model_uri,
                             requirements,
                             constraints=None,
                             strict=False):
    """
    Loads the pip requirements (and optionally constraints) from `model_uri` and compares them
    to `requirements` (and `constraints`).

    If `strict` is True, evaluate `set(requirements) == set(loaded_requirements)`.
    Otherwise, evaluate `set(requirements) <= set(loaded_requirements)`.
    """
    local_path = _download_artifact_from_uri(model_uri)
    txt_reqs = _read_lines(os.path.join(local_path, _REQUIREMENTS_FILE_NAME))
    conda_reqs = _get_pip_deps(
        _read_yaml(os.path.join(local_path, _CONDA_ENV_FILE_NAME)))
    compare_func = set.__eq__ if strict else set.__le__
    requirements = set(requirements)
    assert compare_func(requirements, set(txt_reqs))
    assert compare_func(requirements, set(conda_reqs))

    if constraints is not None:
        assert f"-c {_CONSTRAINTS_FILE_NAME}" in txt_reqs
        assert f"-c {_CONSTRAINTS_FILE_NAME}" in conda_reqs
        cons = _read_lines(os.path.join(local_path, _CONSTRAINTS_FILE_NAME))
        assert compare_func(set(constraints), set(cons))
Example #4
0
def _assert_pip_requirements(model_uri, requirements, constraints=None):
    local_path = _download_artifact_from_uri(model_uri)
    txt_reqs = _read_lines(os.path.join(local_path, "requirements.txt"))
    conda_reqs = _get_pip_deps(_read_yaml(os.path.join(local_path, "conda.yaml")))
    assert txt_reqs == requirements
    assert conda_reqs == requirements

    if constraints:
        assert f"-c {_CONSTRAINTS_FILE_NAME}" in txt_reqs
        assert f"-c {_CONSTRAINTS_FILE_NAME}" in conda_reqs
        cons = _read_lines(os.path.join(local_path, _CONSTRAINTS_FILE_NAME))
        assert cons == constraints
Example #5
0
def test_process_pip_requirements(tmpdir):
    conda_env, reqs, cons = _process_pip_requirements(["a"])
    assert _get_pip_deps(conda_env) == ["mlflow", "a"]
    assert reqs == ["mlflow", "a"]
    assert cons == []

    conda_env, reqs, cons = _process_pip_requirements(["a"],
                                                      pip_requirements=["b"])
    assert _get_pip_deps(conda_env) == ["mlflow", "b"]
    assert reqs == ["mlflow", "b"]
    assert cons == []

    # Ensure a requirement for mlflow is preserved
    conda_env, reqs, cons = _process_pip_requirements(
        ["a"], pip_requirements=["mlflow==1.2.3"])
    assert _get_pip_deps(conda_env) == ["mlflow==1.2.3"]
    assert reqs == ["mlflow==1.2.3"]
    assert cons == []

    # Ensure a requirement for mlflow is preserved when package hashes are specified
    hash1 = "sha256:963c22532e82a93450674ab97d62f9e528ed0906b580fadb7c003e696197557c"
    hash2 = "sha256:b15ff0c7e5e64f864a0b40c99b9a582227315eca2065d9f831db9aeb8f24637b"
    conda_env, reqs, cons = _process_pip_requirements(
        ["a"],
        pip_requirements=[f"mlflow==1.20.2 --hash={hash1} --hash={hash2}"],
    )
    assert _get_pip_deps(conda_env) == [
        f"mlflow==1.20.2 --hash={hash1} --hash={hash2}"
    ]
    assert reqs == [f"mlflow==1.20.2 --hash={hash1} --hash={hash2}"]
    assert cons == []

    conda_env, reqs, cons = _process_pip_requirements(
        ["a"], extra_pip_requirements=["b"])
    assert _get_pip_deps(conda_env) == ["mlflow", "a", "b"]
    assert reqs == ["mlflow", "a", "b"]
    assert cons == []

    con_file = tmpdir.join("constraints.txt")
    con_file.write("c")
    conda_env, reqs, cons = _process_pip_requirements(
        ["a"], pip_requirements=["b", f"-c {con_file.strpath}"])
    assert _get_pip_deps(conda_env) == ["mlflow", "b", "-c constraints.txt"]
    assert reqs == ["mlflow", "b", "-c constraints.txt"]
    assert cons == ["c"]
Example #6
0
def save_explainer(
    explainer,
    path,
    serialize_model_using_mlflow=True,
    conda_env=None,
    code_paths=None,
    mlflow_model=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    pip_requirements=None,
    extra_pip_requirements=None,
):
    """
    Save a SHAP explainer to a path on the local file system. Produces an MLflow Model
    containing the following flavors:

        - :py:mod:`mlflow.shap`
        - :py:mod:`mlflow.pyfunc`

    :param explainer: SHAP explainer to be saved.
    :param path: Local path where the explainer is to be saved.
    :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying
                                         model and serialize it as an MLmodel, otherwise it
                                         uses SHAP's internal serialization. Defaults to True.
                                         Currently MLflow serialization is only supported for
                                         models of 'sklearn' or 'pytorch' flavors.

    :param conda_env: {{ conda_env }}
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files are *prepended* to the system
                       path when the model is loaded.
    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.
    :param pip_requirements: {{ pip_requirements }}
    :param extra_pip_requirements: {{ extra_pip_requirements }}
    """
    import shap

    _validate_env_arguments(conda_env, pip_requirements,
                            extra_pip_requirements)

    _validate_and_prepare_target_save_path(path)
    code_dir_subpath = _validate_and_copy_code_paths(code_paths, path)

    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)

    underlying_model_flavor = None
    underlying_model_path = None
    serializable_by_mlflow = False

    # saving the underlying model if required
    if serialize_model_using_mlflow:
        underlying_model_flavor = get_underlying_model_flavor(explainer.model)

        if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR:
            serializable_by_mlflow = True  # prevents SHAP from serializing the underlying model
            underlying_model_path = os.path.join(path,
                                                 _UNDERLYING_MODEL_SUBPATH)
        else:
            warnings.warn(
                "Unable to serialize underlying model using MLflow, will use SHAP serialization"
            )

        if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME:
            mlflow.sklearn.save_model(explainer.model.inner_model.__self__,
                                      underlying_model_path)
        elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME:
            mlflow.pytorch.save_model(explainer.model.inner_model,
                                      underlying_model_path)

    # saving the explainer object
    explainer_data_subpath = "explainer.shap"
    explainer_output_path = os.path.join(path, explainer_data_subpath)
    with open(explainer_output_path, "wb") as explainer_output_file_handle:
        if serialize_model_using_mlflow and serializable_by_mlflow:
            explainer.save(explainer_output_file_handle, model_saver=False)
        else:
            explainer.save(explainer_output_file_handle)

    pyfunc.add_to_model(
        mlflow_model,
        loader_module="mlflow.shap",
        model_path=explainer_data_subpath,
        underlying_model_flavor=underlying_model_flavor,
        env=_CONDA_ENV_FILE_NAME,
        code=code_dir_subpath,
    )

    mlflow_model.add_flavor(
        FLAVOR_NAME,
        shap_version=shap.__version__,
        serialized_explainer=explainer_data_subpath,
        underlying_model_flavor=underlying_model_flavor,
        code=code_dir_subpath,
    )

    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))

    if conda_env is None:
        if pip_requirements is None:
            default_reqs = get_default_pip_requirements()
            # To ensure `_load_pyfunc` can successfully load the model during the dependency
            # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file.
            inferred_reqs = mlflow.models.infer_pip_requirements(
                path,
                FLAVOR_NAME,
                fallback=default_reqs,
            )
            default_reqs = sorted(set(inferred_reqs).union(default_reqs))
        else:
            default_reqs = None
        conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
            default_reqs,
            pip_requirements,
            extra_pip_requirements,
        )
    else:
        conda_env, pip_requirements, pip_constraints = _process_conda_env(
            conda_env)

    if underlying_model_path is not None:
        underlying_model_conda_env = _get_conda_env_for_underlying_model(
            underlying_model_path)
        conda_env = _merge_environments(conda_env, underlying_model_conda_env)
        pip_requirements = _get_pip_deps(conda_env)

    with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Save `constraints.txt` if necessary
    if pip_constraints:
        write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME),
                 "\n".join(pip_constraints))

    # Save `requirements.txt`
    write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME),
             "\n".join(pip_requirements))

    _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
Example #7
0
def _compare_conda_env_requirements(env_path, req_path):
    assert os.path.exists(req_path)
    custom_env_parsed = _read_yaml(env_path)
    requirements = _read_lines(req_path)
    assert _get_pip_deps(custom_env_parsed) == requirements
Example #8
0
def save_explainer(
    explainer,
    path,
    serialize_model_using_mlflow=True,
    conda_env=None,
    mlflow_model=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    pip_requirements=None,
    extra_pip_requirements=None,
):
    """
    Save a SHAP explainer to a path on the local file system. Produces an MLflow Model
    containing the following flavors:

        - :py:mod:`mlflow.shap`
        - :py:mod:`mlflow.pyfunc`

    :param explainer: SHAP explainer to be saved.
    :param path: Local path where the explainer is to be saved.
    :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying
                                         model and serialize it as an MLmodel, otherwise it
                                         uses SHAP's internal serialization. Defaults to True.
                                         Currently MLflow serialization is only supported for
                                         models of 'sklearn' or 'pytorch' flavors.

    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If `None`, the default
                      :func:`get_default_conda_env()` environment is added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.6.0',
                                'shap=0.37.0'
                            ]
                        }

    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.
    :param pip_requirements: {{ pip_requirements }}
    :param extra_pip_requirements: {{ extra_pip_requirements }}
    """
    import shap

    _validate_env_arguments(conda_env, pip_requirements,
                            extra_pip_requirements)

    if os.path.exists(path):
        raise MlflowException(
            message="Path '{}' already exists".format(path),
            error_code=RESOURCE_ALREADY_EXISTS,
        )

    os.makedirs(path)
    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)

    underlying_model_flavor = None
    underlying_model_path = None
    serializable_by_mlflow = False

    # saving the underlying model if required
    if serialize_model_using_mlflow:
        underlying_model_flavor = get_underlying_model_flavor(explainer.model)

        if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR:
            serializable_by_mlflow = True  # prevents SHAP from serializing the underlying model
            underlying_model_path = os.path.join(path,
                                                 _UNDERLYING_MODEL_SUBPATH)
        else:
            warnings.warn(
                "Unable to serialize underlying model using MLflow, will use SHAP serialization"
            )

        if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME:
            mlflow.sklearn.save_model(explainer.model.inner_model.__self__,
                                      underlying_model_path)
        elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME:
            mlflow.pytorch.save_model(explainer.model.inner_model,
                                      underlying_model_path)

    # saving the explainer object
    explainer_data_subpath = "explainer.shap"
    explainer_output_path = os.path.join(path, explainer_data_subpath)
    with open(explainer_output_path, "wb") as explainer_output_file_handle:
        if serialize_model_using_mlflow and serializable_by_mlflow:
            explainer.save(explainer_output_file_handle, model_saver=False)
        else:
            explainer.save(explainer_output_file_handle)

    conda_env, pip_requirements, pip_constraints = (_process_pip_requirements(
        get_default_pip_requirements(),
        pip_requirements,
        extra_pip_requirements,
    ) if conda_env is None else _process_conda_env(conda_env))

    # merging the conda environment generated by serializing the underlying model
    if underlying_model_path is not None:
        underlying_model_conda_env = _get_conda_env_for_underlying_model(
            underlying_model_path)
        conda_env = _merge_environments(conda_env, underlying_model_conda_env)
        pip_requirements = _get_pip_deps(conda_env)

    with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Save `constraints.txt` if necessary
    if pip_constraints:
        write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME),
                 "\n".join(pip_constraints))

    # Save `requirements.txt`
    write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME),
             "\n".join(pip_requirements))

    pyfunc.add_to_model(
        mlflow_model,
        loader_module="mlflow.shap",
        model_path=explainer_data_subpath,
        underlying_model_flavor=underlying_model_flavor,
        env=_CONDA_ENV_FILE_NAME,
    )

    mlflow_model.add_flavor(
        FLAVOR_NAME,
        shap_version=shap.__version__,
        serialized_explainer=explainer_data_subpath,
        underlying_model_flavor=underlying_model_flavor,
    )

    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))