def test_process_pip_requirements(tmpdir): conda_env, reqs, cons = _process_pip_requirements(["a"]) assert _get_pip_deps(conda_env) == ["mlflow", "a"] assert reqs == ["mlflow", "a"] assert cons == [] conda_env, reqs, cons = _process_pip_requirements(["a"], pip_requirements=["b"]) assert _get_pip_deps(conda_env) == ["mlflow", "b"] assert reqs == ["mlflow", "b"] assert cons == [] # Ensure a requirement for mlflow is preserved conda_env, reqs, cons = _process_pip_requirements( ["a"], pip_requirements=["mlflow==1.2.3"]) assert _get_pip_deps(conda_env) == ["mlflow==1.2.3"] assert reqs == ["mlflow==1.2.3"] assert cons == [] conda_env, reqs, cons = _process_pip_requirements( ["a"], extra_pip_requirements=["b"]) assert _get_pip_deps(conda_env) == ["mlflow", "a", "b"] assert reqs == ["mlflow", "a", "b"] assert cons == [] con_file = tmpdir.join("constraints.txt") con_file.write("c") conda_env, reqs, cons = _process_pip_requirements( ["a"], pip_requirements=["b", f"-c {con_file.strpath}"]) assert _get_pip_deps(conda_env) == ["mlflow", "b", "-c constraints.txt"] assert reqs == ["mlflow", "b", "-c constraints.txt"] assert cons == ["c"]
def test_process_conda_env(tmpdir): def make_conda_env(pip_deps): return { "name": "mlflow-env", "channels": ["conda-forge"], "dependencies": ["python=3.7.9", "pip", {"pip": pip_deps}], } conda_env, reqs, cons = _process_conda_env(make_conda_env(["a"])) assert _get_pip_deps(conda_env) == ["mlflow", "a"] assert reqs == ["mlflow", "a"] assert cons == [] conda_env_file = tmpdir.join("conda_env.yaml") conda_env_file.write(yaml.dump(make_conda_env(["a"]))) conda_env, reqs, cons = _process_conda_env(conda_env_file.strpath) assert _get_pip_deps(conda_env) == ["mlflow", "a"] assert reqs == ["mlflow", "a"] assert cons == [] # Ensure a requirement for mlflow is preserved conda_env, reqs, cons = _process_conda_env(make_conda_env(["mlflow==1.2.3"])) assert _get_pip_deps(conda_env) == ["mlflow==1.2.3"] assert reqs == ["mlflow==1.2.3"] assert cons == [] con_file = tmpdir.join("constraints.txt") con_file.write("c") conda_env, reqs, cons = _process_conda_env(make_conda_env(["a", f"-c {con_file.strpath}"])) assert _get_pip_deps(conda_env) == ["mlflow", "a", "-c constraints.txt"] assert reqs == ["mlflow", "a", "-c constraints.txt"] assert cons == ["c"] with pytest.raises(TypeError, match=r"Expected .+, but got `int`"): _process_conda_env(0)
def _assert_pip_requirements(model_uri, requirements, constraints=None, strict=False): """ Loads the pip requirements (and optionally constraints) from `model_uri` and compares them to `requirements` (and `constraints`). If `strict` is True, evaluate `set(requirements) == set(loaded_requirements)`. Otherwise, evaluate `set(requirements) <= set(loaded_requirements)`. """ local_path = _download_artifact_from_uri(model_uri) txt_reqs = _read_lines(os.path.join(local_path, _REQUIREMENTS_FILE_NAME)) conda_reqs = _get_pip_deps( _read_yaml(os.path.join(local_path, _CONDA_ENV_FILE_NAME))) compare_func = set.__eq__ if strict else set.__le__ requirements = set(requirements) assert compare_func(requirements, set(txt_reqs)) assert compare_func(requirements, set(conda_reqs)) if constraints is not None: assert f"-c {_CONSTRAINTS_FILE_NAME}" in txt_reqs assert f"-c {_CONSTRAINTS_FILE_NAME}" in conda_reqs cons = _read_lines(os.path.join(local_path, _CONSTRAINTS_FILE_NAME)) assert compare_func(set(constraints), set(cons))
def _assert_pip_requirements(model_uri, requirements, constraints=None): local_path = _download_artifact_from_uri(model_uri) txt_reqs = _read_lines(os.path.join(local_path, "requirements.txt")) conda_reqs = _get_pip_deps(_read_yaml(os.path.join(local_path, "conda.yaml"))) assert txt_reqs == requirements assert conda_reqs == requirements if constraints: assert f"-c {_CONSTRAINTS_FILE_NAME}" in txt_reqs assert f"-c {_CONSTRAINTS_FILE_NAME}" in conda_reqs cons = _read_lines(os.path.join(local_path, _CONSTRAINTS_FILE_NAME)) assert cons == constraints
def test_process_pip_requirements(tmpdir): conda_env, reqs, cons = _process_pip_requirements(["a"]) assert _get_pip_deps(conda_env) == ["mlflow", "a"] assert reqs == ["mlflow", "a"] assert cons == [] conda_env, reqs, cons = _process_pip_requirements(["a"], pip_requirements=["b"]) assert _get_pip_deps(conda_env) == ["mlflow", "b"] assert reqs == ["mlflow", "b"] assert cons == [] # Ensure a requirement for mlflow is preserved conda_env, reqs, cons = _process_pip_requirements( ["a"], pip_requirements=["mlflow==1.2.3"]) assert _get_pip_deps(conda_env) == ["mlflow==1.2.3"] assert reqs == ["mlflow==1.2.3"] assert cons == [] # Ensure a requirement for mlflow is preserved when package hashes are specified hash1 = "sha256:963c22532e82a93450674ab97d62f9e528ed0906b580fadb7c003e696197557c" hash2 = "sha256:b15ff0c7e5e64f864a0b40c99b9a582227315eca2065d9f831db9aeb8f24637b" conda_env, reqs, cons = _process_pip_requirements( ["a"], pip_requirements=[f"mlflow==1.20.2 --hash={hash1} --hash={hash2}"], ) assert _get_pip_deps(conda_env) == [ f"mlflow==1.20.2 --hash={hash1} --hash={hash2}" ] assert reqs == [f"mlflow==1.20.2 --hash={hash1} --hash={hash2}"] assert cons == [] conda_env, reqs, cons = _process_pip_requirements( ["a"], extra_pip_requirements=["b"]) assert _get_pip_deps(conda_env) == ["mlflow", "a", "b"] assert reqs == ["mlflow", "a", "b"] assert cons == [] con_file = tmpdir.join("constraints.txt") con_file.write("c") conda_env, reqs, cons = _process_pip_requirements( ["a"], pip_requirements=["b", f"-c {con_file.strpath}"]) assert _get_pip_deps(conda_env) == ["mlflow", "b", "-c constraints.txt"] assert reqs == ["mlflow", "b", "-c constraints.txt"] assert cons == ["c"]
def save_explainer( explainer, path, serialize_model_using_mlflow=True, conda_env=None, code_paths=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a SHAP explainer to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.shap` - :py:mod:`mlflow.pyfunc` :param explainer: SHAP explainer to be saved. :param path: Local path where the explainer is to be saved. :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying model and serialize it as an MLmodel, otherwise it uses SHAP's internal serialization. Defaults to True. Currently MLflow serialization is only supported for models of 'sklearn' or 'pytorch' flavors. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import shap _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) _validate_and_prepare_target_save_path(path) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) underlying_model_flavor = None underlying_model_path = None serializable_by_mlflow = False # saving the underlying model if required if serialize_model_using_mlflow: underlying_model_flavor = get_underlying_model_flavor(explainer.model) if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR: serializable_by_mlflow = True # prevents SHAP from serializing the underlying model underlying_model_path = os.path.join(path, _UNDERLYING_MODEL_SUBPATH) else: warnings.warn( "Unable to serialize underlying model using MLflow, will use SHAP serialization" ) if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME: mlflow.sklearn.save_model(explainer.model.inner_model.__self__, underlying_model_path) elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME: mlflow.pytorch.save_model(explainer.model.inner_model, underlying_model_path) # saving the explainer object explainer_data_subpath = "explainer.shap" explainer_output_path = os.path.join(path, explainer_data_subpath) with open(explainer_output_path, "wb") as explainer_output_file_handle: if serialize_model_using_mlflow and serializable_by_mlflow: explainer.save(explainer_output_file_handle, model_saver=False) else: explainer.save(explainer_output_file_handle) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.shap", model_path=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, shap_version=shap.__version__, serialized_explainer=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, code=code_dir_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) if underlying_model_path is not None: underlying_model_conda_env = _get_conda_env_for_underlying_model( underlying_model_path) conda_env = _merge_environments(conda_env, underlying_model_conda_env) pip_requirements = _get_pip_deps(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
def _compare_conda_env_requirements(env_path, req_path): assert os.path.exists(req_path) custom_env_parsed = _read_yaml(env_path) requirements = _read_lines(req_path) assert _get_pip_deps(custom_env_parsed) == requirements
def save_explainer( explainer, path, serialize_model_using_mlflow=True, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a SHAP explainer to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.shap` - :py:mod:`mlflow.pyfunc` :param explainer: SHAP explainer to be saved. :param path: Local path where the explainer is to be saved. :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying model and serialize it as an MLmodel, otherwise it uses SHAP's internal serialization. Defaults to True. Currently MLflow serialization is only supported for models of 'sklearn' or 'pytorch' flavors. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.6.0', 'shap=0.37.0' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import shap _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) if os.path.exists(path): raise MlflowException( message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS, ) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) underlying_model_flavor = None underlying_model_path = None serializable_by_mlflow = False # saving the underlying model if required if serialize_model_using_mlflow: underlying_model_flavor = get_underlying_model_flavor(explainer.model) if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR: serializable_by_mlflow = True # prevents SHAP from serializing the underlying model underlying_model_path = os.path.join(path, _UNDERLYING_MODEL_SUBPATH) else: warnings.warn( "Unable to serialize underlying model using MLflow, will use SHAP serialization" ) if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME: mlflow.sklearn.save_model(explainer.model.inner_model.__self__, underlying_model_path) elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME: mlflow.pytorch.save_model(explainer.model.inner_model, underlying_model_path) # saving the explainer object explainer_data_subpath = "explainer.shap" explainer_output_path = os.path.join(path, explainer_data_subpath) with open(explainer_output_path, "wb") as explainer_output_file_handle: if serialize_model_using_mlflow and serializable_by_mlflow: explainer.save(explainer_output_file_handle, model_saver=False) else: explainer.save(explainer_output_file_handle) conda_env, pip_requirements, pip_constraints = (_process_pip_requirements( get_default_pip_requirements(), pip_requirements, extra_pip_requirements, ) if conda_env is None else _process_conda_env(conda_env)) # merging the conda environment generated by serializing the underlying model if underlying_model_path is not None: underlying_model_conda_env = _get_conda_env_for_underlying_model( underlying_model_path) conda_env = _merge_environments(conda_env, underlying_model_conda_env) pip_requirements = _get_pip_deps(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.shap", model_path=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.add_flavor( FLAVOR_NAME, shap_version=shap.__version__, serialized_explainer=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))