def save_model(cb_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, **kwargs): """ Save a CatBoost model to a path on the local file system. :param cb_model: CatBoost model (an instance of `CatBoost`_, `CatBoostClassifier`_, or `CatBoostRegressor`_) to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'catboost==0.19.1' ] ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: kwargs to pass to `CatBoost.save_model`_ method. """ import catboost as cb path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_path = os.path.join(path, _MODEL_BINARY_FILE_NAME) cb_model.save_model(model_data_path, **kwargs) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) model_bin_kwargs = {_MODEL_BINARY_KEY: _MODEL_BINARY_FILE_NAME} pyfunc.add_to_model( mlflow_model, loader_module="mlflow.catboost", env=conda_env_subpath, **model_bin_kwargs, ) flavor_conf = { _MODEL_TYPE_KEY: cb_model.__class__.__name__, _SAVE_FORMAT_KEY: kwargs.get("format", "cbm"), **model_bin_kwargs, } mlflow_model.add_flavor( FLAVOR_NAME, catboost_version=cb.__version__, **flavor_conf, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model to a local path. This method operates on TensorFlow variables and graphs that have been serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel`` format, see the TensorFlow documentation: https://www.tensorflow.org/guide/saved_model#save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and graphs in ``SavedModel`` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized ``SavedModel`` object. For more information, see the ``tags`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param tf_signature_def_key: A string identifying the input/output signature associated with the model. This is a key within the serialized ``savedmodel`` signature definition mapping. For more information, see the ``signature_def_map`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'tensorflow=1.8.0' ] } :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ _logger.info( "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow" " graph...") _validate_saved_model( tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key, ) _logger.info("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath)) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlflow_model.add_flavor( FLAVOR_NAME, saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key, ) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=conda_env_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( lgb_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a LightGBM model to a path on the local file system. :param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) to be saved. Note that models that implement the `scikit-learn API`_ are not supported. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'lightgbm==2.3.0' ] ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ import lightgbm as lgb path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) model_data_subpath = "model.lgb" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save a LightGBM model lgb_model.save_model(model_data_path) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.lightgbm", data=model_data_subpath, env=conda_env_subpath, ) mlflow_model.add_flavor(FLAVOR_NAME, lgb_version=lgb.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( pd_model, path, training=False, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a paddle model to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.paddle` - :py:mod:`mlflow.pyfunc`. NOTE: This flavor is only included for paddle models that define `predict()`, since `predict()` is required for pyfunc model inference. :param pd_model: paddle model to be saved. :param path: Local path where the model is to be saved. :param training: Only valid when saving a model trained using the PaddlePaddle high level API. If set to True, the saved model supports both re-training and inference. If set to False, it only supports inference. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} .. code-block:: python :caption: Example import mlflow.paddle import paddle from paddle.nn import Linear import paddle.nn.functional as F import numpy as np import os import random from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn import preprocessing def load_data(): # dataset on boston housing prediction X, y = load_boston(return_X_y=True) min_max_scaler = preprocessing.MinMaxScaler() X_min_max = min_max_scaler.fit_transform(X) X_normalized = preprocessing.scale(X_min_max, with_std=False) X_train, X_test, y_train, y_test = train_test_split( X_normalized, y, test_size=0.2, random_state=42) y_train = y_train.reshape(-1, 1) y_test = y_test.reshape(-1, 1) return np.concatenate( (X_train, y_train), axis=1),np.concatenate((X_test, y_test), axis=1 ) class Regressor(paddle.nn.Layer): def __init__(self): super(Regressor, self).__init__() self.fc = Linear(in_features=13, out_features=1) @paddle.jit.to_static def forward(self, inputs): x = self.fc(inputs) return x model = Regressor() model.train() training_data, test_data = load_data() opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters()) EPOCH_NUM = 10 BATCH_SIZE = 10 for epoch_id in range(EPOCH_NUM): np.random.shuffle(training_data) mini_batches = [training_data[k : k + BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)] for iter_id, mini_batch in enumerate(mini_batches): x = np.array(mini_batch[:, :-1]).astype('float32') y = np.array(mini_batch[:, -1:]).astype('float32') house_features = paddle.to_tensor(x) prices = paddle.to_tensor(y) predicts = model(house_features) loss = F.square_error_cost(predicts, label=prices) avg_loss = paddle.mean(loss) if iter_id%20==0: print("epoch: {}, iter: {}, loss is: {}".format( epoch_id, iter_id, avg_loss.numpy())) avg_loss.backward() opt.step() opt.clear_grad() mlflow.log_param('learning_rate', 0.01) mlflow.paddle.log_model(model, "model") sk_path_dir = './test-out' mlflow.paddle.save_model(model, sk_path_dir) print("Model saved in run %s" % mlflow.active_run().info.run_uuid) """ import paddle _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) if os.path.exists(path): raise MlflowException(message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "model" output_path = os.path.join(path, model_data_subpath) if isinstance(pd_model, paddle.Model): pd_model.save(output_path, training=training) else: paddle.jit.save(pd_model, output_path) # `PyFuncModel` only works for paddle models that define `predict()`. pyfunc.add_to_model( mlflow_model, loader_module="mlflow.paddle", model_path=model_data_subpath, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.add_flavor( FLAVOR_NAME, pickled_model=model_data_subpath, paddle_version=paddle.__version__, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model(pytorch_model, path, conda_env=None, mlflow_model=None, code_paths=None, pickle_module=None, signature: ModelSignature=None, input_example: ModelInputExample=None, **kwargs): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as input and produce a single output tensor. Any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pytorch=0.4.1', 'torchvision=0.2.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: kwargs to pass to ``torch.save`` method. .. code-block:: python :caption: Example import torch import mlflow import mlflow.pytorch # Create model and set values pytorch_model = Model() pytorch_model_path = ... # train our model for epoch in range(500): y_pred = pytorch_model(x_data) ... # Save the model with mlflow.start_run() as run: mlflow.log_param("epochs", 500) mlflow.pytorch.save_model(pytorch_model, pytorch_model_path) """ import torch pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError('Argument code_paths should be a list, not {}'.format(type(code_paths))) path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) if mlflow_model is None: mlflow_model = Model() os.makedirs(path) if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_explainer( explainer, path, serialize_model_using_mlflow=True, conda_env=None, code_paths=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a SHAP explainer to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.shap` - :py:mod:`mlflow.pyfunc` :param explainer: SHAP explainer to be saved. :param path: Local path where the explainer is to be saved. :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying model and serialize it as an MLmodel, otherwise it uses SHAP's internal serialization. Defaults to True. Currently MLflow serialization is only supported for models of 'sklearn' or 'pytorch' flavors. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import shap _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) if os.path.exists(path): raise MlflowException( message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS, ) os.makedirs(path) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) underlying_model_flavor = None underlying_model_path = None serializable_by_mlflow = False # saving the underlying model if required if serialize_model_using_mlflow: underlying_model_flavor = get_underlying_model_flavor(explainer.model) if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR: serializable_by_mlflow = True # prevents SHAP from serializing the underlying model underlying_model_path = os.path.join(path, _UNDERLYING_MODEL_SUBPATH) else: warnings.warn( "Unable to serialize underlying model using MLflow, will use SHAP serialization" ) if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME: mlflow.sklearn.save_model(explainer.model.inner_model.__self__, underlying_model_path) elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME: mlflow.pytorch.save_model(explainer.model.inner_model, underlying_model_path) # saving the explainer object explainer_data_subpath = "explainer.shap" explainer_output_path = os.path.join(path, explainer_data_subpath) with open(explainer_output_path, "wb") as explainer_output_file_handle: if serialize_model_using_mlflow and serializable_by_mlflow: explainer.save(explainer_output_file_handle, model_saver=False) else: explainer.save(explainer_output_file_handle) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.shap", model_path=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, shap_version=shap.__version__, serialized_explainer=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, code=code_dir_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) if underlying_model_path is not None: underlying_model_conda_env = _get_conda_env_for_underlying_model( underlying_model_path) conda_env = _merge_environments(conda_env, underlying_model_conda_env) pip_requirements = _get_pip_deps(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model( keras_model, path, conda_env=None, mlflow_model=None, custom_objects=None, keras_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, **kwargs ): """ Save a Keras model to a path on the local file system. :param keras_model: Keras model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'keras=2.2.4', 'tensorflow=1.8.0' ] } :param mlflow_model: MLflow model config this flavor is being added to. :param custom_objects: A Keras ``custom_objects`` dictionary mapping names (strings) to custom classes or functions associated with the Keras model. MLflow saves these custom layers using CloudPickle and restores them automatically when the model is loaded with :py:func:`mlflow.keras.load_model` and :py:func:`mlflow.pyfunc.load_model`. :param keras_module: Keras module to be used to save / load the model (``keras`` or ``tf.keras``). If not provided, MLflow will attempt to infer the Keras module based on the given model. :param kwargs: kwargs to pass to ``keras_model.save`` method. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. .. code-block:: python :caption: Example import mlflow # Build, compile, and train your model keras_model = ... keras_model_path = ... keras_model.compile(optimizer="rmsprop", loss="mse", metrics=["accuracy"]) results = keras_model.fit( x_train, y_train, epochs=20, batch_size = 128, validation_data=(x_val, y_val)) # Save the model as an MLflow Model mlflow.keras.save_model(keras_model, keras_model_path) """ if keras_module is None: def _is_plain_keras(model): try: # NB: Network is the first parent with save method import keras.engine.network return isinstance(model, keras.engine.network.Network) except ImportError: return False def _is_tf_keras(model): try: # NB: Network is not exposed in tf.keras, we check for Model instead. import tensorflow.keras.models return isinstance(model, tensorflow.keras.models.Model) except ImportError: return False if _is_plain_keras(keras_model): keras_module = importlib.import_module("keras") elif _is_tf_keras(keras_model): keras_module = importlib.import_module("tensorflow.keras") else: raise MlflowException( "Unable to infer keras module from the model, please specify " "which keras module ('keras' or 'tensorflow.keras') is to be " "used to save and load the model." ) elif type(keras_module) == str: keras_module = importlib.import_module(keras_module) # check if path exists path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) # construct new data folder in existing path data_subpath = "data" data_path = os.path.join(path, data_subpath) os.makedirs(data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # save custom objects if there are custom objects if custom_objects is not None: _save_custom_objects(data_path, custom_objects) # save keras module spec to path/data/keras_module.txt with open(os.path.join(data_path, _KERAS_MODULE_SPEC_PATH), "w") as f: f.write(keras_module.__name__) # By default, Keras uses the SavedModel format -- specified by "tf" # However, we choose to align with prior default of mlflow, HDF5 save_format = kwargs.get("save_format", "h5") # save keras save_format to path/data/save_format.txt with open(os.path.join(data_path, _KERAS_SAVE_FORMAT_PATH), "w") as f: f.write(save_format) # save keras model # To maintain prior behavior, when the format is HDF5, we save # with the h5 file extension. Otherwise, model_path is a directory # where the saved_model.pb will be stored (for SavedModel format) file_extension = ".h5" if save_format == "h5" else "" model_subpath = os.path.join(data_subpath, _MODEL_SAVE_PATH) model_path = os.path.join(path, model_subpath) + file_extension if path.startswith("/dbfs/"): # The Databricks Filesystem uses a FUSE implementation that does not support # random writes. It causes an error. with tempfile.NamedTemporaryFile(suffix=".h5") as f: keras_model.save(f.name, **kwargs) f.flush() # force flush the data shutil.copyfile(src=f.name, dst=model_path) else: keras_model.save(model_path, **kwargs) # update flavor info to mlflow_model mlflow_model.add_flavor( FLAVOR_NAME, keras_module=keras_module.__name__, keras_version=keras_module.__version__, save_format=save_format, data=data_subpath, ) # save conda.yaml info to path/conda.yml if conda_env is None: conda_env = get_default_conda_env( include_cloudpickle=custom_objects is not None, keras_module=keras_module ) elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, _CONDA_ENV_SUBPATH), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # append loader_module, data and env data to mlflow_model pyfunc.add_to_model( mlflow_model, loader_module="mlflow.keras", data=data_subpath, env=_CONDA_ENV_SUBPATH ) # save mlflow_model to path/MLmodel mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( h2o_model, path, conda_env=None, mlflow_model=None, settings=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save an H2O model to a path on the local file system. :param h2o_model: H2O model to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. """ import h2o _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise Exception("Path '{}' already exists".format(path)) model_data_subpath = "model.h2o" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save h2o-model if hasattr(h2o, "download_model"): h2o_save_location = h2o.download_model(model=h2o_model, path=model_data_path) else: warnings.warn( "If your cluster is remote, H2O may not store the model correctly. " "Please upgrade H2O version to a newer version") h2o_save_location = h2o.save_model(model=h2o_model, path=model_data_path, force=True) model_file = os.path.basename(h2o_save_location) # Save h2o-settings if settings is None: settings = {} settings["full_file"] = h2o_save_location settings["model_file"] = model_file settings["model_dir"] = model_data_path with open(os.path.join(model_data_path, "h2o.yaml"), "w") as settings_file: yaml.safe_dump(settings, stream=settings_file) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.h2o", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME) mlflow_model.add_flavor(FLAVOR_NAME, h2o_version=h2o.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model( keras_model, path, conda_env=None, code_paths=None, mlflow_model=None, custom_objects=None, keras_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, **kwargs, ): """ Save a Keras model to a path on the local file system. :param keras_model: Keras model to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: MLflow model config this flavor is being added to. :param custom_objects: A Keras ``custom_objects`` dictionary mapping names (strings) to custom classes or functions associated with the Keras model. MLflow saves these custom layers using CloudPickle and restores them automatically when the model is loaded with :py:func:`mlflow.keras.load_model` and :py:func:`mlflow.pyfunc.load_model`. :param keras_module: Keras module to be used to save / load the model (``keras`` or ``tf.keras``). If not provided, MLflow will attempt to infer the Keras module based on the given model. :param kwargs: kwargs to pass to ``keras_model.save`` method. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} .. code-block:: python :caption: Example import mlflow # Build, compile, and train your model keras_model = ... keras_model_path = ... keras_model.compile(optimizer="rmsprop", loss="mse", metrics=["accuracy"]) results = keras_model.fit( x_train, y_train, epochs=20, batch_size = 128, validation_data=(x_val, y_val)) # Save the model as an MLflow Model mlflow.keras.save_model(keras_model, keras_model_path) """ _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) if keras_module is None: def _is_plain_keras(model): try: import keras # NB: Network is the first parent with save method import keras.engine.network return isinstance(model, keras.engine.network.Network) except ImportError: return False def _is_tf_keras(model): try: # NB: Network is not exposed in tf.keras, we check for Model instead. import tensorflow.keras.models return isinstance(model, tensorflow.keras.models.Model) except ImportError: return False if _is_plain_keras(keras_model): keras_module = importlib.import_module("keras") elif _is_tf_keras(keras_model): keras_module = importlib.import_module("tensorflow.keras") else: raise MlflowException( "Unable to infer keras module from the model, please specify " "which keras module ('keras' or 'tensorflow.keras') is to be " "used to save and load the model.") elif type(keras_module) == str: keras_module = importlib.import_module(keras_module) # check if path exists path = os.path.abspath(path) _validate_and_prepare_target_save_path(path) # construct new data folder in existing path data_subpath = "data" data_path = os.path.join(path, data_subpath) os.makedirs(data_path) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # save custom objects if there are custom objects if custom_objects is not None: _save_custom_objects(data_path, custom_objects) # save keras module spec to path/data/keras_module.txt with open(os.path.join(data_path, _KERAS_MODULE_SPEC_PATH), "w") as f: f.write(keras_module.__name__) # Use the SavedModel format if `save_format` is unspecified save_format = kwargs.get("save_format", "tf") # save keras save_format to path/data/save_format.txt with open(os.path.join(data_path, _KERAS_SAVE_FORMAT_PATH), "w") as f: f.write(save_format) # save keras model # To maintain prior behavior, when the format is HDF5, we save # with the h5 file extension. Otherwise, model_path is a directory # where the saved_model.pb will be stored (for SavedModel format) file_extension = ".h5" if save_format == "h5" else "" model_subpath = os.path.join(data_subpath, _MODEL_SAVE_PATH) model_path = os.path.join(path, model_subpath) + file_extension if path.startswith("/dbfs/"): # The Databricks Filesystem uses a FUSE implementation that does not support # random writes. It causes an error. with tempfile.NamedTemporaryFile(suffix=".h5") as f: keras_model.save(f.name, **kwargs) f.flush() # force flush the data shutil.copyfile(src=f.name, dst=model_path) else: keras_model.save(model_path, **kwargs) # update flavor info to mlflow_model mlflow_model.add_flavor( FLAVOR_NAME, keras_module=keras_module.__name__, keras_version=keras_module.__version__, save_format=save_format, data=data_subpath, code=code_dir_subpath, ) # append loader_module, data and env data to mlflow_model pyfunc.add_to_model( mlflow_model, loader_module="mlflow.keras", data=data_subpath, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) # save mlflow_model to path/MLmodel mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) include_cloudpickle = custom_objects is not None if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements( include_cloudpickle, keras_module) # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model( spacy_model, path, conda_env=None, code_paths=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a spaCy model to a path on the local file system. :param spacy_model: spaCy model to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import spacy _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) _validate_and_prepare_target_save_path(path) model_data_subpath = "model.spacy" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save spacy-model spacy_model.to_disk(path=model_data_path) # Save the pyfunc flavor if at least one text categorizer in spaCy pipeline if any( isinstance(pipe_component[1], spacy.pipeline.TextCategorizer) for pipe_component in spacy_model.pipeline): pyfunc.add_to_model( mlflow_model, loader_module="mlflow.spacy", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) else: _logger.warning( "Generating only the spacy flavor for the provided spacy model. This means the model " "can be loaded back via `mlflow.spacy.load_model`, but cannot be loaded back using " "pyfunc APIs like `mlflow.pyfunc.load_model` or via the `mlflow models` CLI commands. " "MLflow will only generate the pyfunc flavor for spacy models containing a pipeline " "component that is an instance of spacy.pipeline.TextCategorizer.") mlflow_model.add_flavor(FLAVOR_NAME, spacy_version=spacy.__version__, data=model_data_subpath, code=code_dir_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( model_data_path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
def save_model( h2o_model, path, conda_env=None, mlflow_model=None, settings=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save an H2O model to a path on the local file system. :param h2o_model: H2O model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'h2o==3.20.0.8' ] ] } :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. """ import h2o _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise Exception("Path '{}' already exists".format(path)) model_data_subpath = "model.h2o" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save h2o-model if hasattr(h2o, "download_model"): h2o_save_location = h2o.download_model(model=h2o_model, path=model_data_path) else: warnings.warn( "If your cluster is remote, H2O may not store the model correctly. " "Please upgrade H2O version to a newer version") h2o_save_location = h2o.save_model(model=h2o_model, path=model_data_path, force=True) model_file = os.path.basename(h2o_save_location) # Save h2o-settings if settings is None: settings = {} settings["full_file"] = h2o_save_location settings["model_file"] = model_file settings["model_dir"] = model_data_path with open(os.path.join(model_data_path, "h2o.yaml"), "w") as settings_file: yaml.safe_dump(settings, stream=settings_file) conda_env, pip_requirements, pip_constraints = (_process_pip_requirements( get_default_pip_requirements(), pip_requirements, extra_pip_requirements, ) if conda_env is None else _process_conda_env(conda_env)) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.h2o", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME) mlflow_model.add_flavor(FLAVOR_NAME, h2o_version=h2o.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( lgb_model, path, conda_env=None, code_paths=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a LightGBM model to a path on the local file system. :param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) or models that implement the `scikit-learn API`_ to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import lightgbm as lgb _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) _validate_and_prepare_target_save_path(path) model_data_subpath = "model.lgb" if isinstance( lgb_model, lgb.Booster) else "model.pkl" model_data_path = os.path.join(path, model_data_subpath) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save a LightGBM model _save_model(lgb_model, model_data_path) lgb_model_class = _get_fully_qualified_class_name(lgb_model) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.lightgbm", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, lgb_version=lgb.__version__, data=model_data_subpath, model_class=lgb_model_class, code=code_dir_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements( include_cloudpickle=not isinstance(lgb_model, lgb.Booster)) # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements)) _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
def save_model( fonduer_model: FonduerModel, path: str, conn_string: str, mlflow_model: Model = Model(), code_paths: Optional[List[str]] = None, parallel: Optional[int] = 1, model_type: Optional[str] = "discriminative", labeler: Optional[Labeler] = None, gen_models: Optional[List[LabelModel]] = None, featurizer: Optional[Featurizer] = None, disc_model: Optional[Classifier] = None, ) -> None: """Save a custom MLflow model to a path on the local file system. :param fonduer_model: the model to be saved. :param path: the path on the local file system. :param conn_string: the connection string. :param mlflow_model: model configuration. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are prepended to the system path when the model is loaded. :param parallel: the number of parallelism. :param model_type: the model type, either "discriminative" or "generative", defaults to "discriminative". :param labeler: a labeler, defaults to None. :param gen_models: a list of generative models, defaults to None. :param featurizer: a featurizer, defaults to None. :param disc_model: a discriminative model, defaults to None. """ os.makedirs(path) model_code_path = os.path.join(path, pyfunc.CODE) os.makedirs(model_code_path) with open(os.path.join(path, "fonduer_model.pkl"), "wb") as f: pickle.dump(fonduer_model, f) if model_type == "discriminative": key_names = [key.name for key in featurizer.get_keys()] with open(os.path.join(path, "feature_keys.pkl"), "wb") as f: pickle.dump(key_names, f) disc_model.save(model_file="best_model.pt", save_dir=path) else: for candidate_class, gen_model in zip(labeler.candidate_classes, gen_models): gen_model.save( os.path.join(path, candidate_class.__name__ + ".pkl")) key_names = [key.name for key in labeler.get_keys()] with open(os.path.join(path, "labeler_keys.pkl"), "wb") as f: pickle.dump(key_names, f) _copy_file_or_tree(src=__file__, dst=model_code_path) if code_paths is not None: for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=model_code_path) mlflow_model.add_flavor( pyfunc.FLAVOR_NAME, code=pyfunc.CODE, loader_module=__name__, conn_string=conn_string, parallel=parallel, model_type=model_type, ) mlflow_model.save(os.path.join(path, "MLmodel"))
def save_model( diviner_model, path, conda_env=None, code_paths=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a ``Diviner`` model object to a path on the local file system. :param diviner_model: ``Diviner`` model that has been ``fit`` on a grouped temporal ``DataFrame``. :param path: Local path destination for the serialized model is to be saved. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` the flavor that this model is being added to. :param signature: :py:class:`Model Signature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: py from mlflow.models.signature import infer_signature model = diviner.GroupedProphet().fit(data, ("region", "state")) predictions = model.predict(prediction_config) signature = infer_signature(data, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a ``Pandas DataFrame`` and then serialized to json using the ``Pandas`` split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import diviner _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = pathlib.Path(path).absolute() _validate_and_prepare_target_save_path(str(path)) # NB: When moving to native pathlib implementations, path encoding as string will not be needed. code_dir_subpath = _validate_and_copy_code_paths(code_paths, str(path)) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, str(path)) diviner_model.save(str(path.joinpath(_MODEL_BINARY_FILE_NAME))) model_bin_kwargs = {_MODEL_BINARY_KEY: _MODEL_BINARY_FILE_NAME} pyfunc.add_to_model( mlflow_model, loader_module="mlflow.diviner", env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, **model_bin_kwargs, ) flavor_conf = { _MODEL_TYPE_KEY: diviner_model.__class__.__name__, **model_bin_kwargs } mlflow_model.add_flavor(FLAVOR_NAME, diviner_version=diviner.__version__, code=code_dir_subpath, **flavor_conf) mlflow_model.save(str(path.joinpath(MLMODEL_FILE_NAME))) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() inferred_reqs = mlflow.models.infer_pip_requirements( str(path), FLAVOR_NAME, fallback=default_reqs) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with path.joinpath(_CONDA_ENV_FILE_NAME).open("w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if pip_constraints: write_to(str(path.joinpath(_CONSTRAINTS_FILE_NAME)), "\n".join(pip_constraints)) write_to(str(path.joinpath(_REQUIREMENTS_FILE_NAME)), "\n".join(pip_requirements))
def save_model( pr_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a Prophet model to a path on the local file system. :param pr_model: Prophet model (an instance of Prophet() forecaster that has been fit on a temporal series. :param path: Local path where the serialized model (as JSON) is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature model = Prophet().fit(df) train = model.history predictions = model.predict(model.make_future_dataframe(30)) signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import prophet _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException(f"Path '{path}' already exists") os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_path = os.path.join(path, _MODEL_BINARY_FILE_NAME) _save_model(pr_model, model_data_path) model_bin_kwargs = {_MODEL_BINARY_KEY: _MODEL_BINARY_FILE_NAME} pyfunc.add_to_model(mlflow_model, loader_module="mlflow.prophet", env=_CONDA_ENV_FILE_NAME, **model_bin_kwargs) flavor_conf = { _MODEL_TYPE_KEY: pr_model.__class__.__name__, **model_bin_kwargs, } mlflow_model.add_flavor( FLAVOR_NAME, prophet_version=prophet.__version__, **flavor_conf, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: # cannot use inferred requirements due to prophet's build process # as the package installation of pystan requires Cython to be present # in the path. Prophet's installation itself requires imports of # existing libraries, preventing the execution of a batched pip install # and instead using a a strictly defined list of dependencies. # NOTE: if Prophet .whl build architecture is changed, this should be # modified to a standard inferred approach. default_reqs = get_default_pip_requirements() else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model(fastai_learner, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, **kwargs): """ Save a fastai Learner to a path on the local file system. :param fastai_learner: fastai Learner to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: MLflow model config this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} :param kwargs: kwargs to pass to ``Learner.save`` method. .. code-block:: python :caption: Example import os import mlflow.fastai # Create a fastai Learner model model = ... # Start MLflow session and save model to current working directory with mlflow.start_run(): model.fit(epochs, learning_rate) mlflow.fastai.save_model(model, 'model') # Load saved model for inference model_uri = "{}/{}".format(os.getcwd(), 'model') loaded_model = mlflow.fastai.load_model(model_uri) results = loaded_model.predict(predict_data) """ import fastai from pathlib import Path _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) model_data_subpath = "model.fastai" model_data_path = os.path.join(path, model_data_subpath) model_data_path = Path(model_data_path) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save an Learner fastai_learner.export(model_data_path, **kwargs) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.fastai", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.add_flavor(FLAVOR_NAME, fastai_version=fastai.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model( pytorch_model, path, conda_env=None, mlflow_model=None, code_paths=None, pickle_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, requirements_file=None, extra_files=None, **kwargs ): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Can be either an eager model (subclass of ``torch.nn.Module``) or scripted model prepared via ``torch.jit.script`` or ``torch.jit.trace``. The model accept a single ``torch.FloatTensor`` as input and produce a single output tensor. If saving an eager model, any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pytorch=0.4.1', 'torchvision=0.2.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param requirements_file: A string containing the path to requirements file. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``requirements_file`` string - requirements_file = "s3://my-bucket/path/to/my_file" In this case, the ``"my_file"`` requirements file is downloaded from S3. If ``None``, no requirements file is added to the model. :param extra_files: A list containing the paths to corresponding extra files. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``extra_files`` list - extra_files = ["s3://my-bucket/path/to/my_file1", "s3://my-bucket/path/to/my_file2"] In this case, the ``"my_file1 & my_file2"`` extra file is downloaded from S3. If ``None``, no extra files are added to the model. :param kwargs: kwargs to pass to ``torch.save`` method. .. code-block:: python :caption: Example import os import torch import mlflow.pytorch # Class defined here class LinearNNModel(torch.nn.Module): ... # Initialize our model, criterion and optimizer ... # Training loop ... # Save PyTorch models to current working directory with mlflow.start_run() as run: mlflow.pytorch.save_model(model, "model") # Convert to a scripted model and save it scripted_pytorch_model = torch.jit.script(model) mlflow.pytorch.save_model(scripted_pytorch_model, "scripted_model") # Load each saved model for inference for model_path in ["model", "scripted_model"]: model_uri = "{}/{}".format(os.getcwd(), model_path) loaded_model = mlflow.pytorch.load_model(model_uri) print("Loaded {}:".format(model_path)) for x in [6.0, 8.0, 12.0, 30.0]: X = torch.Tensor([[x]]) y_pred = loaded_model(X) print("predict X: {}, y_pred: {:.2f}".format(x, y_pred.data.item())) print("--") .. code-block:: text :caption: Output Loaded model: predict X: 6.0, y_pred: 11.90 predict X: 8.0, y_pred: 15.92 predict X: 12.0, y_pred: 23.96 predict X: 30.0, y_pred: 60.13 -- Loaded scripted_model: predict X: 6.0, y_pred: 11.90 predict X: 8.0, y_pred: 15.92 predict X: 12.0, y_pred: 23.96 predict X: 30.0, y_pred: 60.13 """ import torch pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths))) path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) if mlflow_model is None: mlflow_model = Model() os.makedirs(path) if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) if isinstance(pytorch_model, torch.jit.ScriptModule): torch.jit.ScriptModule.save(pytorch_model, model_path) else: torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) torchserve_artifacts_config = {} if requirements_file: if not isinstance(requirements_file, str): raise TypeError("Path to requirements file should be a string") with TempDir() as tmp_requirements_dir: _download_artifact_from_uri( artifact_uri=requirements_file, output_path=tmp_requirements_dir.path() ) rel_path = os.path.basename(requirements_file) torchserve_artifacts_config[_REQUIREMENTS_FILE_KEY] = {"path": rel_path} shutil.move(tmp_requirements_dir.path(rel_path), path) if extra_files: torchserve_artifacts_config[_EXTRA_FILES_KEY] = [] if not isinstance(extra_files, list): raise TypeError("Extra files argument should be a list") with TempDir() as tmp_extra_files_dir: for extra_file in extra_files: _download_artifact_from_uri( artifact_uri=extra_file, output_path=tmp_extra_files_dir.path() ) rel_path = posixpath.join(_EXTRA_FILES_KEY, os.path.basename(extra_file),) torchserve_artifacts_config[_EXTRA_FILES_KEY].append({"path": rel_path}) shutil.move( tmp_extra_files_dir.path(), posixpath.join(path, _EXTRA_FILES_KEY), ) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__, **torchserve_artifacts_config, ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model to a local path. This method operates on TensorFlow variables and graphs that have been serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel`` format, see the TensorFlow documentation: https://www.tensorflow.org/guide/saved_model#save_and_restore_models. :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and graphs in ``SavedModel`` format. :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the serialized ``SavedModel`` object. For more information, see the ``tags`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param tf_signature_def_key: A string identifying the input/output signature associated with the model. This is a key within the serialized ``savedmodel`` signature definition mapping. For more information, see the ``signature_def_map`` parameter of the ``tf.saved_model.builder.savedmodelbuilder`` method. :param path: Local path where the MLflow model is to be saved. :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor. :param conda_env: {{ conda_env }} :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) _logger.info( "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow" " graph..." ) _validate_saved_model( tf_saved_model_dir=tf_saved_model_dir, tf_meta_graph_tags=tf_meta_graph_tags, tf_signature_def_key=tf_signature_def_key, ) _logger.info("Validation succeeded!") if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None) model_dir_subpath = "tfmodel" model_dir_path = os.path.join(path, model_dir_subpath) shutil.move(os.path.join(path, root_relative_path), model_dir_path) flavor_conf = dict( saved_model_dir=model_dir_subpath, meta_graph_tags=tf_meta_graph_tags, signature_def_key=tf_signature_def_key, ) mlflow_model.add_flavor(FLAVOR_NAME, **flavor_conf) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=_CONDA_ENV_FILE_NAME) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def _save_model_with_class_artifacts_params(path, python_model, artifacts=None, conda_env=None, code_paths=None, mlflow_model=None): """ :param path: The path to which to save the Python model. :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model`` defines how the model loads artifacts and how it performs inference. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs are resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` attribute. If ``None``, no artifacts are added to the model. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor. """ if mlflow_model is None: mlflow_model = Model() custom_model_config_kwargs = { CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__, } if isinstance(python_model, PythonModel): saved_python_model_subpath = "python_model.pkl" with open(os.path.join(path, saved_python_model_subpath), "wb") as out: cloudpickle.dump(python_model, out) custom_model_config_kwargs[ CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath else: raise MlflowException( message= ("`python_model` must be a subclass of `PythonModel`. Instead, found an" " object of type: {python_model_type}".format( python_model_type=type(python_model))), error_code=INVALID_PARAMETER_VALUE, ) if artifacts: saved_artifacts_config = {} with TempDir() as tmp_artifacts_dir: tmp_artifacts_config = {} saved_artifacts_dir_subpath = "artifacts" for artifact_name, artifact_uri in artifacts.items(): tmp_artifact_path = _download_artifact_from_uri( artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()) tmp_artifacts_config[artifact_name] = tmp_artifact_path saved_artifact_subpath = posixpath.join( saved_artifacts_dir_subpath, os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()), ) saved_artifacts_config[artifact_name] = { CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath, CONFIG_KEY_ARTIFACT_URI: artifact_uri, } shutil.move(tmp_artifacts_dir.path(), os.path.join(path, saved_artifacts_dir_subpath)) custom_model_config_kwargs[ CONFIG_KEY_ARTIFACTS] = saved_artifacts_config conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) _log_pip_requirements(conda_env, path) saved_code_subpath = None if code_paths is not None: saved_code_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=saved_code_subpath) mlflow.pyfunc.add_to_model(model=mlflow_model, loader_module=__name__, code=saved_code_subpath, env=conda_env_subpath, **custom_model_config_kwargs) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_explainer( explainer, path, serialize_model_using_mlflow=True, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a SHAP explainer to a path on the local file system. Produces an MLflow Model containing the following flavors: - :py:mod:`mlflow.shap` - :py:mod:`mlflow.pyfunc` :param explainer: SHAP explainer to be saved. :param path: Local path where the explainer is to be saved. :param serialize_model_using_mlflow: When set to True, MLflow will extract the underlying model and serialize it as an MLmodel, otherwise it uses SHAP's internal serialization. Defaults to True. Currently MLflow serialization is only supported for models of 'sklearn' or 'pytorch' flavors. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.6.0', 'shap=0.37.0' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ import shap if os.path.exists(path): raise MlflowException( message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS, ) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) underlying_model_flavor = None underlying_model_path = None serializable_by_mlflow = False # saving the underlying model if required if serialize_model_using_mlflow: underlying_model_flavor = get_underlying_model_flavor(explainer.model) if underlying_model_flavor != _UNKNOWN_MODEL_FLAVOR: serializable_by_mlflow = True # prevents SHAP from serializing the underlying model underlying_model_path = os.path.join(path, _UNDERLYING_MODEL_SUBPATH) else: warnings.warn( "Unable to serialize underlying model using MLflow, will use SHAP serialization" ) if underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME: mlflow.sklearn.save_model(explainer.model.inner_model.__self__, underlying_model_path) elif underlying_model_flavor == mlflow.pytorch.FLAVOR_NAME: mlflow.pytorch.save_model(explainer.model.inner_model, underlying_model_path) # saving the explainer object explainer_data_subpath = "explainer.shap" explainer_output_path = os.path.join(path, explainer_data_subpath) with open(explainer_output_path, "wb") as explainer_output_file_handle: if serialize_model_using_mlflow and serializable_by_mlflow: explainer.save(explainer_output_file_handle, model_saver=False) else: explainer.save(explainer_output_file_handle) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) # merging the conda environment generated by serializing the underlying model if underlying_model_path is not None: underlying_model_conda_path = os.path.join(underlying_model_path, "conda.yaml") with open(underlying_model_conda_path, "r") as underlying_model_conda_file: underlying_model_conda_env = yaml.safe_load( underlying_model_conda_file) conda_env = _merge_environments(conda_env, underlying_model_conda_env) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.shap", model_path=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, env=conda_env_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, shap_version=shap.__version__, serialized_explainer=explainer_data_subpath, underlying_model_flavor=underlying_model_flavor, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( gluon_model, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a Gluon model to a path on the local file system. :param gluon_model: Gluon model to be saved. Must be already hybridized. :param path: Local path where the model is to be saved. :param mlflow_model: MLflow model config this flavor is being added to. :param conda_env: {{ conda_env }} :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} .. code-block:: python :caption: Example from mxnet.gluon import Trainer from mxnet.gluon.contrib import estimator from mxnet.gluon.loss import SoftmaxCrossEntropyLoss from mxnet.gluon.nn import HybridSequential from mxnet.metric import Accuracy import mlflow # Build, compile, and train your model gluon_model_path = ... net = HybridSequential() with net.name_scope(): ... net.hybridize() net.collect_params().initialize() softmax_loss = SoftmaxCrossEntropyLoss() trainer = Trainer(net.collect_params()) est = estimator.Estimator(net=net, loss=softmax_loss, metrics=Accuracy(), trainer=trainer) est.fit(train_data=train_data, epochs=100, val_data=validation_data) # Save the model as an MLflow Model mlflow.gluon.save_model(net, gluon_model_path) """ import mxnet as mx _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) data_subpath = "data" data_path = os.path.join(path, data_subpath) os.makedirs(data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # The epoch argument of the export method does not play any role in selecting # a specific epoch's parameters, and is there only for display purposes. gluon_model.export(os.path.join(data_path, _MODEL_SAVE_PATH)) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.gluon", env=_CONDA_ENV_FILE_NAME) mlflow_model.add_flavor(FLAVOR_NAME, mxnet_version=mx.__version__) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def test_model_save_load(): m = Model( artifact_path="some/path", run_id="123", flavors={ "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 } }, signature=ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ), saved_input_example_info={ "x": 1, "y": 2 }, ) assert m.get_input_schema() == m.signature.inputs assert m.get_output_schema() == m.signature.outputs x = Model(artifact_path="some/other/path", run_id="1234") assert x.get_input_schema() is None assert x.get_output_schema() is None n = Model( artifact_path="some/path", run_id="123", flavors={ "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 } }, signature=ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ), saved_input_example_info={ "x": 1, "y": 2 }, ) n.utc_time_created = m.utc_time_created n.model_uuid = m.model_uuid assert m == n n.signature = None assert m != n with TempDir() as tmp: m.save(tmp.path("model")) o = Model.load(tmp.path("model")) assert m == o assert m.to_json() == o.to_json() assert m.to_yaml() == o.to_yaml()
def save_model( gluon_model, path, mlflow_model=None, conda_env=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a Gluon model to a path on the local file system. :param gluon_model: Gluon model to be saved. Must be already hybridized. :param path: Local path where the model is to be saved. :param mlflow_model: MLflow model config this flavor is being added to. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`mlflow.gluon.get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'mxnet=1.5.0' ] } :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. .. code-block:: python :caption: Example from mxnet.gluon import Trainer from mxnet.gluon.contrib import estimator from mxnet.gluon.loss import SoftmaxCrossEntropyLoss from mxnet.gluon.nn import HybridSequential from mxnet.metric import Accuracy import mlflow # Build, compile, and train your model gluon_model_path = ... net = HybridSequential() with net.name_scope(): ... net.hybridize() net.collect_params().initialize() softmax_loss = SoftmaxCrossEntropyLoss() trainer = Trainer(net.collect_params()) est = estimator.Estimator(net=net, loss=softmax_loss, metrics=Accuracy(), trainer=trainer) est.fit(train_data=train_data, epochs=100, val_data=validation_data) # Save the model as an MLflow Model mlflow.gluon.save_model(net, gluon_model_path) """ path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) data_subpath = "data" data_path = os.path.join(path, data_subpath) os.makedirs(data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # The epoch argument of the export method does not play any role in selecting # a specific epoch's paramaters, and is there only for display purposes. gluon_model.export(os.path.join(data_path, _MODEL_SAVE_PATH)) with open(os.path.join(path, "architecture.txt"), "w") as fp: fp.write(str(gluon_model)) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.gluon", env=conda_env_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( spacy_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a spaCy model to a path on the local file system. :param spacy_model: spaCy model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pip': [ 'spacy==2.2.3' ] ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ import spacy path = os.path.abspath(path) if os.path.exists(path): raise MlflowException( "Unable to save MLflow model to {path} - path '{path}' " "already exists".format(path=path)) model_data_subpath = "model.spacy" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save spacy-model spacy_model.to_disk(path=model_data_path) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save the pyfunc flavor if at least one text categorizer in spaCy pipeline if any([ isinstance(pipe_component[1], spacy.pipeline.TextCategorizer) for pipe_component in spacy_model.pipeline ]): pyfunc.add_to_model( mlflow_model, loader_module="mlflow.spacy", data=model_data_subpath, env=conda_env_subpath, ) else: _logger.warning( "Generating only the spacy flavor for the provided spacy model. This means the model " "can be loaded back via `mlflow.spacy.load_model`, but cannot be loaded back using " "pyfunc APIs like `mlflow.pyfunc.load_model` or via the `mlflow models` CLI commands. " "MLflow will only generate the pyfunc flavor for spacy models containing a pipeline " "component that is an instance of spacy.pipeline.TextCategorizer.") mlflow_model.add_flavor(FLAVOR_NAME, spacy_version=spacy.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( sk_model, path, conda_env=None, mlflow_model=None, serialization_format=SERIALIZATION_FORMAT_CLOUDPICKLE, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a scikit-learn model to a path on the local file system. :param sk_model: scikit-learn model to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'scikit-learn=0.19.2' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param serialization_format: The format in which to serialize the model. This should be one of the formats listed in ``mlflow.sklearn.SUPPORTED_SERIALIZATION_FORMATS``. The Cloudpickle format, ``mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE``, provides better cross-system compatibility by identifying and packaging code dependencies with the serialized model. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. .. code-block:: python :caption: Example import mlflow.sklearn from sklearn.datasets import load_iris from sklearn import tree iris = load_iris() sk_model = tree.DecisionTreeClassifier() sk_model = sk_model.fit(iris.data, iris.target) # Save the model in cloudpickle format # set path to location for persistence sk_path_dir_1 = ... mlflow.sklearn.save_model( sk_model, sk_path_dir_1, serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_CLOUDPICKLE) # save the model in pickle format # set path to location for persistence sk_path_dir_2 = ... mlflow.sklearn.save_model(sk_model, sk_path_dir_2, serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_PICKLE) """ import sklearn if serialization_format not in SUPPORTED_SERIALIZATION_FORMATS: raise MlflowException( message= ("Unrecognized serialization format: {serialization_format}. Please specify one" " of the following supported formats: {supported_formats}.". format( serialization_format=serialization_format, supported_formats=SUPPORTED_SERIALIZATION_FORMATS, )), error_code=INVALID_PARAMETER_VALUE, ) if os.path.exists(path): raise MlflowException(message="Path '{}' already exists".format(path), error_code=RESOURCE_ALREADY_EXISTS) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "model.pkl" _save_model( sk_model=sk_model, output_path=os.path.join(path, model_data_subpath), serialization_format=serialization_format, ) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env( include_cloudpickle=serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE) elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # `PyFuncModel` only works for sklearn models that define `predict()`. if hasattr(sk_model, "predict"): pyfunc.add_to_model( mlflow_model, loader_module="mlflow.sklearn", model_path=model_data_subpath, env=conda_env_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, pickled_model=model_data_subpath, sklearn_version=sklearn.__version__, serialization_format=serialization_format, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( statsmodels_model, path, conda_env=None, mlflow_model=None, remove_data: bool = False, signature: ModelSignature = None, input_example: ModelInputExample = None, ): """ Save a statsmodels model to a path on the local file system. :param statsmodels_model: statsmodels model (an instance of `statsmodels.base.model.Results`_) to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'statsmodels=0.11.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param remove_data: bool. If False (default), then the instance is pickled without changes. If True, then all arrays with length nobs are set to None before pickling. See the remove_data method. In some cases not all arrays will be set to None. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ import statsmodels path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) model_data_path = os.path.join(path, STATSMODELS_DATA_SUBPATH) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save a statsmodels model statsmodels_model.save(model_data_path, remove_data) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.statsmodels", data=STATSMODELS_DATA_SUBPATH, env=conda_env_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, statsmodels_version=statsmodels.__version__, data=STATSMODELS_DATA_SUBPATH ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( pmdarima_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a pmdarima ``ARIMA`` model or ``Pipeline`` object to a path on the local file system. :param pmdarima_model: pmdarima ``ARIMA`` or ``Pipeline`` model that has been ``fit`` on a temporal series. :param path: Local path destination for the serialized model (in pickle format) is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`Model Signature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: py from mlflow.models.signature import infer_signature model = pmdarima.auto_arima(data) predictions = model.predict(n_periods=30, return_conf_int=False) signature = infer_signature(data, predictions) .. Warning:: if utilizing confidence interval generation in the ``predict`` method of a ``pmdarima`` model (``return_conf_int=True``), the signature will not be inferred due to the complex tuple return type when using the native ``ARIMA.predict()`` API. ``infer_schema`` will function correctly if using the ``pyfunc`` flavor of the model, though. :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a ``Pandas DataFrame`` and then serialized to json using the ``Pandas`` split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import pmdarima _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException(f"Path '{path}' already exists") os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_path = os.path.join(path, _MODEL_BINARY_FILE_NAME) _save_model(pmdarima_model, model_data_path) model_bin_kwargs = {_MODEL_BINARY_KEY: _MODEL_BINARY_FILE_NAME} pyfunc.add_to_model(mlflow_model, loader_module="mlflow.pmdarima", env=_CONDA_ENV_FILE_NAME, **model_bin_kwargs) flavor_conf = { _MODEL_TYPE_KEY: pmdarima_model.__class__.__name__, **model_bin_kwargs, } mlflow_model.add_flavor(FLAVOR_NAME, pmdarima_version=pmdarima.__version__, **flavor_conf) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model(fastai_learner, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, **kwargs): """ Save a fastai Learner to a path on the local file system. :param fastai_learner: fastai Learner to be saved. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this describes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'fastai=1.0.60', ] } :param mlflow_model: MLflow model config this flavor is being added to. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: kwargs to pass to ``Learner.save`` method. .. code-block:: python :caption: Example import os import mlflow.fastai # Create a fastai Learner model model = ... # Start MLflow session and save model to current working directory with mlflow.start_run(): model.fit(epochs, learning_rate) mlflow.fastai.save_model(model, 'model') # Load saved model for inference model_uri = "{}/{}".format(os.getcwd(), 'model') loaded_model = mlflow.fastai.load_model(model_uri) results = loaded_model.predict(predict_data) """ import fastai from pathlib import Path path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) model_data_subpath = "model.fastai" model_data_path = os.path.join(path, model_data_subpath) model_data_path = Path(model_data_path) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save an Learner fastai_learner.export(model_data_path, **kwargs) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) pyfunc.add_to_model(mlflow_model, loader_module="mlflow.fastai", data=model_data_subpath, env=conda_env_subpath) mlflow_model.add_flavor(FLAVOR_NAME, fastai_version=fastai.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model( xgb_model, path, conda_env=None, mlflow_model=None, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save an XGBoost model to a path on the local file system. :param xgb_model: XGBoost model (an instance of `xgboost.Booster`_) to be saved. Note that models that implement the `scikit-learn API`_ are not supported. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import xgboost as xgb _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) if os.path.exists(path): raise MlflowException("Path '{}' already exists".format(path)) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "model.xgb" model_data_path = os.path.join(path, model_data_subpath) # Save an XGBoost model xgb_model.save_model(model_data_path) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.xgboost", data=model_data_subpath, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.add_flavor(FLAVOR_NAME, xgb_version=xgb.__version__, data=model_data_subpath) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def save_model( statsmodels_model, path, conda_env=None, code_paths=None, mlflow_model=None, remove_data: bool = False, signature: ModelSignature = None, input_example: ModelInputExample = None, pip_requirements=None, extra_pip_requirements=None, ): """ Save a statsmodels model to a path on the local file system. :param statsmodels_model: statsmodels model (an instance of `statsmodels.base.model.Results`_) to be saved. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param remove_data: bool. If False (default), then the instance is pickled without changes. If True, then all arrays with length nobs are set to None before pickling. See the remove_data method. In some cases not all arrays will be set to None. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} """ import statsmodels _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) path = os.path.abspath(path) _validate_and_prepare_target_save_path(path) model_data_path = os.path.join(path, STATSMODELS_DATA_SUBPATH) code_dir_subpath = _validate_and_copy_code_paths(code_paths, path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) # Save a statsmodels model statsmodels_model.save(model_data_path, remove_data) if _save_model_called_from_autolog and not remove_data: saved_model_size = os.path.getsize(model_data_path) if saved_model_size >= _model_size_threshold_for_emitting_warning: _logger.warning( "The fitted model is larger than " f"{_model_size_threshold_for_emitting_warning // (1024 * 1024)} MB, " f"saving it as artifacts is time consuming.\n" "To reduce model size, use `mlflow.statsmodels.autolog(log_models=False)` and " "manually log model by " '`mlflow.statsmodels.log_model(model, remove_data=True, artifact_path="model")`' ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.statsmodels", data=STATSMODELS_DATA_SUBPATH, env=_CONDA_ENV_FILE_NAME, code=code_dir_subpath, ) mlflow_model.add_flavor( FLAVOR_NAME, statsmodels_version=statsmodels.__version__, data=STATSMODELS_DATA_SUBPATH, code=code_dir_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env( conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))