def add_clearbox_flavor_to_model( model: Model, loader_module: str, data: str = None, code=None, env: str = None, preprocessing: str = None, data_preparation: str = None, **kwargs, ) -> Model: """Add Clearbox flavor to a model configuration. Caller can use this to create a valid Clearbox model flavor out of an existing directory structure. A Clearbox flavor will be added to the flavors list into the MLModel file: flavors: clearbox: env: ... loader_module: ... model_path: ... python_version: ... Parameters ---------- model : Model Existing model. loader_module : str The module to be used to load the model (e.g. clearbox_wrapper.sklearn) data : str, optional Path to the model data, by default None. code : str, optional Path to the code dependencies, by default None. env : str, optional Path to the Conda environment, by default None. Returns ------- Model The Model with the new flavor added. """ parms = deepcopy(kwargs) parms[MAIN] = loader_module parms[PY_VERSION] = PYTHON_VERSION if code: parms[CODE] = code if data: parms[DATA] = data if env: parms[ENV] = env if preprocessing: parms[PREPROCESSING] = preprocessing if data_preparation: parms[DATA_PREPARATION] = data_preparation return model.add_flavor(FLAVOR_NAME, **parms)
def load_model(model_path: str, suppress_warnings: bool = False) -> PyFuncModel: """Load a model that has python_function flavor. Parameters ---------- model_path : str Filepath of the model directory. suppress_warnings : bool, optional If Fatal, non-fatal warning messages associated with the model loading process will be emitted, by default True Returns ------- PyFuncModel A python_function model. Raises ------ ClearboxWrapperException If the model does not have the python_function flavor. """ mlmodel = Model.load(os.path.join(model_path, MLMODEL_FILE_NAME)) pyfunc_flavor_configuration = mlmodel.flavors.get(FLAVOR_NAME) if pyfunc_flavor_configuration is None: raise ClearboxWrapperException( 'Model does not have the "{flavor_name}" flavor'.format( flavor_name=FLAVOR_NAME)) model_python_version = pyfunc_flavor_configuration.get(PY_VERSION) if not suppress_warnings: _warn_potentially_incompatible_py_version_if_necessary( model_py_version=model_python_version) if CODE in pyfunc_flavor_configuration and pyfunc_flavor_configuration[ CODE]: code_path = os.path.join(model_path, pyfunc_flavor_configuration[CODE]) _add_code_to_system_path(code_path=code_path) data_path = (os.path.join(model_path, pyfunc_flavor_configuration[DATA]) if (DATA in pyfunc_flavor_configuration) else model_path) model_implementation = importlib.import_module( pyfunc_flavor_configuration[MAIN])._load_pyfunc(data_path) return PyFuncModel(model_meta=mlmodel, model_impl=model_implementation)
def _get_flavor_configuration(model_path: str, flavor_name: str) -> Dict: """Get the configuration for a specified flavor of a model. Parameters ---------- model_path : str Path to the model directory. flavor_name : str Name of the flavor configuration to load. Returns ------- Dict Flavor configuration as a dictionary. Raises ------ ClearboxWrapperException If it couldn't find a MLmodel file or if the model doesn't contain the specified flavor. """ mlmodel_path = os.path.join(model_path, MLMODEL_FILE_NAME) if not os.path.exists(mlmodel_path): raise ClearboxWrapperException( 'Could not find an "{}" configuration file at "{}"'.format( MLMODEL_FILE_NAME, model_path ) ) mlmodel = Model.load(mlmodel_path) if flavor_name not in mlmodel.flavors: raise ClearboxWrapperException( 'Model does not have the "{}" flavor'.format(flavor_name) ) flavor_configuration_dict = mlmodel.flavors[flavor_name] return flavor_configuration_dict
POSTGRES = "postgresql" MYSQL = "mysql" SQLITE = "sqlite" MSSQL = "mssql" DATABASE_ENGINES = [POSTGRES, MYSQL, SQLITE, MSSQL] _INVALID_DB_URI_MSG = ( "Please refer to https://mlflow.org/docs/latest/tracking.html#storage for " "format specifications.") _UNSUPPORTED_DB_TYPE_MSG = "Supported database engines are {%s}" % ", ".join( DATABASE_ENGINES) new_model = Model() def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model() <mlflow.pyfunc.save_model>` and :func:`log_model() <mlflow.pyfunc.log_model>` when a user-defined subclass of :class:`PythonModel` is provided. """ return _get_default_conda_env( additional_conda_deps=None, additional_pip_deps=[ "cloudpickle=={}".format(cloudpickle.__version__) ], additional_conda_channels=None,
def load_model(model_path: str, suppress_warnings: bool = False) -> WrapperModel: """Load a model that has python_function flavor. Parameters ---------- model_path : str Filepath of the model directory. suppress_warnings : bool, optional If Fatal, non-fatal warning messages associated with the model loading process will be emitted, by default True Returns ------- PyFuncModel A python_function model. Raises ------ ClearboxWrapperException If the model does not have the python_function flavor. """ preprocessing = None data_preparation = None mlmodel = Model.load(os.path.join(model_path, MLMODEL_FILE_NAME)) clearbox_flavor_configuration = mlmodel.flavors.get(FLAVOR_NAME) if clearbox_flavor_configuration is None: raise ClearboxWrapperException( 'Model does not have the "{flavor_name}" flavor'.format( flavor_name=FLAVOR_NAME)) model_python_version = clearbox_flavor_configuration.get(PY_VERSION) if not suppress_warnings: _warn_potentially_incompatible_py_version_if_necessary( model_py_version=model_python_version) data_path = (os.path.join(model_path, clearbox_flavor_configuration[DATA]) if (DATA in clearbox_flavor_configuration) else model_path) model_implementation = importlib.import_module( clearbox_flavor_configuration[MAIN])._load_clearbox(data_path) if PREPROCESSING in clearbox_flavor_configuration: preprocessing_path = os.path.join( model_path, clearbox_flavor_configuration[PREPROCESSING]) preprocessing = load_serialized_preprocessing(preprocessing_path) if DATA_PREPARATION in clearbox_flavor_configuration: data_preparation_path = os.path.join( model_path, clearbox_flavor_configuration[DATA_PREPARATION]) data_preparation = load_serialized_data_preparation( data_preparation_path) loaded_model = WrapperModel( model_meta=mlmodel, model_impl=model_implementation, preprocessing=preprocessing, data_preparation=data_preparation, ) return loaded_model
def save_model( path: str, model: Any, input_data: Optional[WrapperInput] = None, preprocessing: Optional[Callable] = None, data_preparation: Optional[Callable] = None, additional_deps: Optional[List] = None, zip: bool = True, ) -> None: path_check = path + ".zip" if zip else path if os.path.exists(path_check): raise ClearboxWrapperException( "Model path '{}' already exists".format(path)) mlmodel = Model() saved_preprocessing_subpath = None saved_data_preparation_subpath = None if data_preparation is not None and preprocessing is None: raise ValueError( "Attribute 'preprocessing' is None but attribute " "'data_preparation' is not None. If you have a single step " "preprocessing, pass it as attribute 'preprocessing'") if data_preparation and preprocessing: preparation = DataPreparation(data_preparation) data_preprocessing = Preprocessing(preprocessing) saved_data_preparation_subpath = "data_preparation.pkl" saved_preprocessing_subpath = "preprocessing.pkl" if input_data is not None: if isinstance(input_data, pd.DataFrame) and input_data.shape[0] > 50: input_data = input_data.head(50) elif isinstance(input_data, np.ndarray) and input_data.shape[0] > 50: input_data = input_data[:50, :] data_preparation_output = preparation.prepare_data(input_data) preprocessing_output = data_preprocessing.preprocess( data_preparation_output) data_preparation_signature = infer_signature( input_data, data_preparation_output) preprocessing_signature = infer_signature(data_preparation_output, preprocessing_output) model_signature = infer_signature(preprocessing_output) mlmodel.preparation_signature = data_preparation_signature mlmodel.preprocessing_signature = preprocessing_signature mlmodel.model_signature = model_signature elif preprocessing: data_preprocessing = Preprocessing(preprocessing) saved_preprocessing_subpath = "preprocessing.pkl" if input_data is not None: preprocessing_output = data_preprocessing.preprocess(input_data) preprocessing_signature = infer_signature(input_data, preprocessing_output) model_signature = infer_signature(preprocessing_output) mlmodel.preprocessing_signature = preprocessing_signature mlmodel.model_signature = model_signature elif input_data is not None: model_signature = infer_signature(input_data) mlmodel.model_signature = model_signature conda_env = _check_and_get_conda_env(model, additional_deps) model_super_classes = get_super_classes_names(model) if any("sklearn" in super_class for super_class in model_super_classes): save_sklearn_model( model, path, conda_env=conda_env, mlmodel=mlmodel, add_clearbox_flavor=True, preprocessing_subpath=saved_preprocessing_subpath, data_preparation_subpath=saved_data_preparation_subpath, ) elif any("xgboost" in super_class for super_class in model_super_classes): save_xgboost_model( model, path, conda_env=conda_env, mlmodel=mlmodel, add_clearbox_flavor=True, preprocessing_subpath=saved_preprocessing_subpath, data_preparation_subpath=saved_data_preparation_subpath, ) elif any("keras" in super_class for super_class in model_super_classes): save_keras_model( model, path, conda_env=conda_env, mlmodel=mlmodel, add_clearbox_flavor=True, preprocessing_subpath=saved_preprocessing_subpath, data_preparation_subpath=saved_data_preparation_subpath, ) elif any("torch" in super_class for super_class in model_super_classes): save_pytorch_model( model, path, conda_env=conda_env, mlmodel=mlmodel, add_clearbox_flavor=True, preprocessing_subpath=saved_preprocessing_subpath, data_preparation_subpath=saved_data_preparation_subpath, ) if preprocessing: data_preprocessing.save(os.path.join(path, saved_preprocessing_subpath)) if data_preparation: preparation.save(os.path.join(path, saved_data_preparation_subpath)) if zip: zip_directory(path)
def save_pytorch_model( pytorch_model: Any, path: str, conda_env: Optional[Union[str, Dict]] = None, mlmodel: Optional[Model] = None, signature: Optional[Signature] = None, add_clearbox_flavor: bool = False, preprocessing_subpath: str = None, data_preparation_subpath: str = None, code_paths=None, pickle_module=None, requirements_file=None, extra_files=None, **kwargs ): import torch pickle_module = pickle_module or clearbox_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError( "Argument code_paths should be a list, not {}".format(type(code_paths)) ) if os.path.exists(path): raise ClearboxWrapperException("Model path '{}' already exists".format(path)) os.makedirs(path) if mlmodel is None: mlmodel = Model() if signature is not None: mlmodel.signature = signature model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is # necessary because the `data` directory is the only available parameter to # `_load_pyfunc`, and it does not contain the MLmodel configuration; therefore, # it is not sufficient to place the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) if isinstance(pytorch_model, torch.jit.ScriptModule): torch.jit.ScriptModule.save(pytorch_model, model_path) else: torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) torchserve_artifacts_config = {} if requirements_file: if not isinstance(requirements_file, str): raise TypeError("Path to requirements file should be a string") with TempDir() as tmp_requirements_dir: rel_path = os.path.basename(requirements_file) torchserve_artifacts_config[_REQUIREMENTS_FILE_KEY] = {"path": rel_path} shutil.move(tmp_requirements_dir.path(rel_path), path) if extra_files: torchserve_artifacts_config[_EXTRA_FILES_KEY] = [] if not isinstance(extra_files, list): raise TypeError("Extra files argument should be a list") with TempDir() as tmp_extra_files_dir: for extra_file in extra_files: rel_path = posixpath.join( _EXTRA_FILES_KEY, os.path.basename(extra_file), ) torchserve_artifacts_config[_EXTRA_FILES_KEY].append({"path": rel_path}) shutil.move( tmp_extra_files_dir.path(), posixpath.join(path, _EXTRA_FILES_KEY), ) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_pytorch_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlmodel.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__, **torchserve_artifacts_config, ) pyfunc.add_pyfunc_flavor_to_model( mlmodel, loader_module="clearbox_wrapper.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath, ) if add_clearbox_flavor: add_clearbox_flavor_to_model( mlmodel, loader_module="clearbox_wrapper.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath, preprocessing=preprocessing_subpath, data_preparation=data_preparation_subpath, ) mlmodel.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_sklearn_model( sk_model: Any, path: str, conda_env: Optional[Union[str, Dict]] = None, mlmodel: Optional[Model] = None, serialization_format: str = SERIALIZATION_FORMAT_CLOUDPICKLE, signature: Optional[Signature] = None, add_clearbox_flavor: bool = False, preprocessing_subpath: str = None, data_preparation_subpath: str = None, ): """Save a Scikit-Learn model. Produces an MLflow Model containing the following flavors: * wrapper.sklearn * wrapper.pyfunc. NOTE: This flavor is only included for scikit-learn models that define at least `predict()`, since `predict()` is required for pyfunc model inference. Parameters ---------- sk_model : Any A Scikit-Learn model to be saved. path : str Local path to save the model to. conda_env : Optional[Union[str, Dict]], optional A dictionary representation of a Conda environment or the path to a Conda environment YAML file, by default None. This decsribes the environment this model should be run in. If None, the default Conda environment will be added to the model. Example of a dictionary representation of a Conda environment: { 'name': 'conda-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'scikit-learn=0.19.2' ] } serialization_format : str, optional The format in which to serialize the model. This should be one of the formats listed in SUPPORTED_SERIALIZATION_FORMATS. Cloudpickle format, SERIALIZATION_FORMAT_CLOUDPICKLE, provides better cross-system compatibility by identifying and packaging code dependencies with the serialized model, by default SERIALIZATION_FORMAT_CLOUDPICKLE signature : Optional[Signature], optional A model signature describes model input schema. It can be inferred from datasets with valid model type (e.g. the training dataset with target column omitted), by default None Raises ------ ClearboxWrapperException If unrecognized serialization format or model path already exists. """ import sklearn if serialization_format not in SUPPORTED_SERIALIZATION_FORMATS: raise ClearboxWrapperException( "Unrecognized serialization format: {serialization_format}. Please specify one" " of the following supported formats: {supported_formats}.".format( serialization_format=serialization_format, supported_formats=SUPPORTED_SERIALIZATION_FORMATS, )) if os.path.exists(path): raise ClearboxWrapperException( "Model path '{}' already exists".format(path)) os.makedirs(path) if mlmodel is None: mlmodel = Model() if signature is not None: mlmodel.signature = signature model_data_subpath = "model.pkl" _serialize_and_save_model( sk_model=sk_model, output_path=os.path.join(path, model_data_subpath), serialization_format=serialization_format, ) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_sklearn_conda_env( include_cloudpickle=serialization_format == SERIALIZATION_FORMAT_CLOUDPICKLE) elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # `PyFuncModel` only works for sklearn models that define `predict()`. if hasattr(sk_model, "predict"): pyfunc.add_pyfunc_flavor_to_model( mlmodel, loader_module="clearbox_wrapper.sklearn", model_path=model_data_subpath, env=conda_env_subpath, ) if add_clearbox_flavor: add_clearbox_flavor_to_model( mlmodel, loader_module="clearbox_wrapper.sklearn", model_path=model_data_subpath, env=conda_env_subpath, preprocessing=preprocessing_subpath, data_preparation=data_preparation_subpath, ) mlmodel.add_flavor( FLAVOR_NAME, model_path=model_data_subpath, sklearn_version=sklearn.__version__, serialization_format=serialization_format, ) mlmodel.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_keras_model(keras_model: Any, path: str, conda_env: Optional[Union[str, Dict]] = None, mlmodel: Optional[Model] = None, signature: Optional[Signature] = None, add_clearbox_flavor: bool = False, preprocessing_subpath: str = None, data_preparation_subpath: str = None, keras_module: str = None, custom_objects=None, **kwargs): if keras_module is None: def _is_plain_keras(model): try: import keras if LooseVersion(keras.__version__) < LooseVersion("2.2.0"): import keras.engine return isinstance(model, keras.engine.Model) else: # NB: Network is the first parent with save method import keras.engine.network return isinstance(model, keras.engine.network.Network) except ImportError: return False def _is_tf_keras(model): try: # NB: Network is not exposed in tf.keras, we check for Model instead. import tensorflow.keras.models return isinstance(model, tensorflow.keras.models.Model) except ImportError: return False if _is_plain_keras(keras_model): keras_module = importlib.import_module("keras") elif _is_tf_keras(keras_model): keras_module = importlib.import_module("tensorflow.keras") else: raise ClearboxWrapperException( "Unable to infer keras module from the model, please specify " "which keras module ('keras' or 'tensorflow.keras') is to be " "used to save and load the model.") elif type(keras_module) == str: keras_module = importlib.import_module(keras_module) if os.path.exists(path): raise ClearboxWrapperException( "Model path '{}' already exists".format(path)) data_subpath = "data" data_path = os.path.join(path, data_subpath) os.makedirs(data_path) if mlmodel is None: mlmodel = Model() if signature is not None: mlmodel.signature = signature if custom_objects is not None: _save_custom_objects(data_path, custom_objects) # save keras module spec to path/data/keras_module.txt with open(os.path.join(data_path, _KERAS_MODULE_SPEC_PATH), "w") as f: f.write(keras_module.__name__) # Use the SavedModel format if `save_format` is unspecified save_format = kwargs.get("save_format", "tf") # save keras save_format to path/data/save_format.txt with open(os.path.join(data_path, _KERAS_SAVE_FORMAT_PATH), "w") as f: f.write(save_format) # save keras model # To maintain prior behavior, when the format is HDF5, we save # with the h5 file extension. Otherwise, model_path is a directory # where the saved_model.pb will be stored (for SavedModel format) file_extension = ".h5" if save_format == "h5" else "" model_subpath = os.path.join(data_subpath, _MODEL_SAVE_PATH) model_path = os.path.join(path, model_subpath) + file_extension keras_model.save(model_path, **kwargs) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_keras_conda_env( include_cloudpickle=custom_objects is not None, keras_module=keras_module) elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) mlmodel.add_flavor( FLAVOR_NAME, keras_module=keras_module.__name__, keras_version=keras_module.__version__, save_format=save_format, data=data_subpath, ) pyfunc.add_pyfunc_flavor_to_model( mlmodel, loader_module="clearbox_wrapper.keras", data=data_subpath, env=conda_env_subpath, ) if add_clearbox_flavor: add_clearbox_flavor_to_model( mlmodel, loader_module="clearbox_wrapper.keras", data=data_subpath, env=conda_env_subpath, preprocessing=preprocessing_subpath, data_preparation=data_preparation_subpath, ) mlmodel.save(os.path.join(path, MLMODEL_FILE_NAME))
def save_model(path, loader_module=None, data_path=None, code_path=None, conda_env=None, mlflow_model=None, python_model=None, artifacts=None, signature: Signature = None, **kwargs): """ save_model(path, loader_module=None, data_path=None, code_path=None, conda_env=None,\ mlflow_model=Model(), python_model=None, artifacts=None) Save a Pyfunc model with custom inference logic and optional data dependencies to a path on the local filesystem. For information about the workflows that this method supports, please see :ref:`"workflows for creating custom pyfunc models" <pyfunc-create-custom-workflows>` and :ref:`"which workflow is right for my use case?" <pyfunc-create-custom-selecting-workflow>`. Note that the parameters for the second workflow: ``loader_module``, ``data_path`` and the parameters for the first workflow: ``python_model``, ``artifacts``, cannot be specified together. :param path: The path to which to save the Python model. :param loader_module: The name of the Python module that is used to load the model from ``data_path``. This module must define a method with the prototype ``_load_pyfunc(data_path)``. If not ``None``, this module and its dependencies must be included in one of the following locations: - The MLflow library. - Package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_path`` parameter. :param data_path: Path to a file or directory containing model data. :param code_path: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. This decsribes the environment this model should be run in. If ``python_model`` is not ``None``, the Conda environment must at least specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'cloudpickle==0.5.8' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` configuration to which to add the **python_function** flavor. :param python_model: An instance of a subclass of :class:`~PythonModel`. This class is serialized using the CloudPickle library. Any dependencies of the class should be included in one of the following locations: - The MLflow library. - Package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_path`` parameter. Note: If the class is imported from another module, as opposed to being defined in the ``__main__`` scope, the defining module should also be included in one of the listed locations. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs are resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` parameter in :func:`PythonModel.load_context() <mlflow.pyfunc.PythonModel.load_context>` and :func:`PythonModel.predict() <mlflow.pyfunc.PythonModel.predict>`. For example, consider the following ``artifacts`` dictionary:: { "my_file": "s3://my-bucket/path/to/my/file" } In this case, the ``"my_file"`` artifact is downloaded from S3. The ``python_model`` can then refer to ``"my_file"`` as an absolute filesystem path via ``context.artifacts["my_file"]``. If ``None``, no artifacts are added to the model. :param signature: (Experimental) :py:class:`Signature <mlflow.models.Signature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. """ mlflow_model = kwargs.pop("model", mlflow_model) if len(kwargs) > 0: raise TypeError( "save_model() got unexpected keyword arguments: {}".format(kwargs)) if code_path is not None: if not isinstance(code_path, list): raise TypeError( "Argument code_path should be a list, not {}".format( type(code_path))) first_argument_set = { "loader_module": loader_module, "data_path": data_path, } second_argument_set = { "artifacts": artifacts, "python_model": python_model, } first_argument_set_specified = any( [item is not None for item in first_argument_set.values()]) second_argument_set_specified = any( [item is not None for item in second_argument_set.values()]) if first_argument_set_specified and second_argument_set_specified: raise ClearboxWrapperException( "The following sets of parameters cannot be specified together: {first_set_keys}" " and {second_set_keys}. All parameters in one set must be `None`. Instead, found" " the following values: {first_set_entries} and {second_set_entries}" .format( first_set_keys=first_argument_set.keys(), second_set_keys=second_argument_set.keys(), first_set_entries=first_argument_set, second_set_entries=second_argument_set, )) elif (loader_module is None) and (python_model is None): msg = ( "Either `loader_module` or `python_model` must be specified. A `loader_module` " "should be a python module. A `python_model` should be a subclass of PythonModel" ) raise ClearboxWrapperException(msg) if os.path.exists(path): raise ClearboxWrapperException("Path '{}' already exists".format(path)) os.makedirs(path) if mlflow_model is None: mlflow_model = Model() if signature is not None: mlflow_model.signature = signature if first_argument_set_specified: return _save_model_with_loader_module_and_data_path( path=path, loader_module=loader_module, data_path=data_path, code_paths=code_path, conda_env=conda_env, mlflow_model=mlflow_model, ) elif second_argument_set_specified: return _save_model_with_class_artifacts_params( path=path, python_model=python_model, artifacts=artifacts, conda_env=conda_env, code_paths=code_path, mlflow_model=mlflow_model, )