Ejemplo n.º 1
0
def _load_pyfunc(model_path):
    pyfunc_config = _get_flavor_configuration(
        model_path=model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME)
    python_model_subpath = pyfunc_config.get(CONFIG_KEY_PYTHON_MODEL, None)
    if python_model_subpath is None:
        raise MlflowException(
            "Python model path was not specified in the model configuration")
    with open(os.path.join(model_path, python_model_subpath), "rb") as f:
        python_model = cloudpickle.load(f)

    # TODO: If the longevity of the temporary directory prior becomes problematic, consider using
    # an alternative solution.
    tmp_artifacts_dir_path = tempfile.mkdtemp(suffix="artifacts")
    artifacts = {}
    for saved_artifact_name, saved_artifact_info in\
            pyfunc_config.get(CONFIG_KEY_ARTIFACTS, {}).items():
        tmp_artifact_path = os.path.join(
            tmp_artifacts_dir_path,
            _copy_file_or_tree(src=os.path.join(
                model_path,
                saved_artifact_info[CONFIG_KEY_ARTIFACT_RELATIVE_PATH]),
                               dst=tmp_artifacts_dir_path,
                               dst_dir=saved_artifact_name))
        artifacts[saved_artifact_name] = tmp_artifact_path

    context = PythonModelContext(artifacts=artifacts)
    python_model.load_context(context=context)
    return _PythonModelPyfuncWrapper(python_model=python_model,
                                     context=context)
Ejemplo n.º 2
0
def test_dir_create():
    with TempDir() as tmp:
        file_path = tmp.path("test_file.txt")
        create_dir = tmp.path("test_dir2/")
        with open(file_path, "a") as f:
            f.write("testing")
        name = _copy_file_or_tree(file_path, file_path, create_dir)
        assert filecmp.cmp(file_path, name)
Ejemplo n.º 3
0
def _save_model_with_loader_module_and_data_path(
    path, loader_module, data_path=None, code_paths=None, conda_env=None, mlflow_model=Model()
):
    """
    Export model as a generic Python function model.
    :param path: The path to which to save the Python model.
    :param loader_module: The name of the Python module that is used to load the model
                          from ``data_path``. This module must define a method with the prototype
                          ``_load_pyfunc(data_path)``.
    :param data_path: Path to a file or directory containing model data.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                      containing file dependencies). These files are *prepended* to the system
                      path before the model is loaded.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in.
    :return: Model configuration containing model info.
    """

    code = None
    data = None

    if data_path is not None:
        model_file = _copy_file_or_tree(src=data_path, dst=path, dst_dir="data")
        data = model_file

    if code_paths is not None:
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=path, dst_dir="code")
        code = "code"

    conda_env_subpath = "mlflow_env.yml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    mlflow.pyfunc.add_to_model(
        mlflow_model, loader_module=loader_module, code=code, data=data, env=conda_env_subpath
    )
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
    return mlflow_model
Ejemplo n.º 4
0
def _save_model_with_loader_module_and_data_path(path, loader_module, data_path=None,
                                                 code_paths=None, conda_env=None,
                                                 mlflow_model=Model()):
    """
    Export model as a generic Python function model.
    :param path: The path to which to save the Python model.
    :param loader_module: The name of the Python module that is used to load the model
                          from ``data_path``. This module must define a method with the prototype
                          ``_load_pyfunc(data_path)``.
    :param data_path: Path to a file or directory containing model data.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                      containing file dependencies). These files are *prepended* to the system
                      path before the model is loaded.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in.
    :return: Model configuration containing model info.
    """
    if os.path.exists(path):
        raise MlflowException(
            message="Path '{}' already exists".format(path),
            error_code=RESOURCE_ALREADY_EXISTS)
    os.makedirs(path)

    code = None
    data = None
    env = None

    if data_path is not None:
        model_file = _copy_file_or_tree(src=data_path, dst=path, dst_dir="data")
        data = model_file

    if code_paths is not None:
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=path, dst_dir="code")
        code = "code"

    if conda_env is not None:
        shutil.copy(src=conda_env, dst=os.path.join(path, "mlflow_env.yml"))
        env = "mlflow_env.yml"

    mlflow.pyfunc.add_to_model(
        mlflow_model, loader_module=loader_module, code=code, data=data, env=env)
    mlflow_model.save(os.path.join(path, 'MLmodel'))
    return mlflow_model
Ejemplo n.º 5
0
def save_model(tf_saved_model_dir, tf_meta_graph_tags, tf_signature_def_key, path,
               mlflow_model=Model(), conda_env=None):
    """
    Save a *serialized* collection of Tensorflow graphs and variables as an MLflow model
    to a local path. This method operates on Tensorflow variables and graphs that have been
    serialized in Tensorflow's `SavedModel` format. For more information about the `SavedModel`,
    see the following Tensorflow documentation: https://www.tensorflow.org/guide/saved_model#
    save_and_restore_models.

    :param tf_saved_model_dir: Path to the directory containing serialized Tensorflow variables and
                               graphs in `SavedModel` format.
    :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the
                               serialized `savedmodel` object. for more information, see the `tags`
                               parameter of the `tf.saved_model.builder.savedmodelbuilder` method:
                               https://www.tensorflow.org/api_docs/python/tf/saved_model/builder/
                               savedmodelbuilder#add_meta_graph
    :param tf_signature_def_key: A string identifying the input/output signature associated with the
                                 model. this is a key within the serialized `savedmodel`'s signature
                                 definition mapping. for more information, see the
                                 `signature_def_map` parameter of the
                                 `tf.saved_model.builder.savedmodelbuilder` method.
    :param path: Local path where the MLflow model is to be saved.
    :param mlflow_model: MLflow model configuration to which this flavor will be added.
    :param conda_env: Path to a Conda environment file. If provided, this decribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in ``mlflow.tensorflow.DEFAULT_CONDA_ENV``. If `None`, the default
                      ``mlflow.tensorflow.DEFAULT_CONDA_ENV`` environment will be added to the
                      model.
    """
    eprint("Validating the specified Tensorflow model by attempting to load it in a new Tensorflow"
           " graph...")
    _validate_saved_model(tf_saved_model_dir=tf_saved_model_dir,
                          tf_meta_graph_tags=tf_meta_graph_tags,
                          tf_signature_def_key=tf_signature_def_key)
    eprint("Validation succeeded!")

    if os.path.exists(path):
        raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY)
    os.makedirs(path)
    root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None)
    model_dir_subpath = "tfmodel"
    shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath))

    conda_env_subpath = "conda.yaml"
    if conda_env is not None:
        shutil.copyfile(conda_env, os.path.join(path, conda_env_subpath))
    else:
        with open(os.path.join(path, conda_env_subpath), "w") as f:
            yaml.safe_dump(DEFAULT_CONDA_ENV, stream=f, default_flow_style=False)

    mlflow_model.add_flavor(FLAVOR_NAME, saved_model_dir=model_dir_subpath,
                            meta_graph_tags=tf_meta_graph_tags,
                            signature_def_key=tf_signature_def_key)
    pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=conda_env_subpath)
    mlflow_model.save(os.path.join(path, "MLmodel"))
Ejemplo n.º 6
0
def save_model(path, loader_module=None, data_path=None, conda_env=None, mlflow_model=None, **kwargs):
    """Save the explanation locally using the MLflow model format.

    This function is necessary for log_explanation to work properly.

    :param path: The destination path for the saved explanation.
    :type path: str
    :param loader_module: The package that will be used to reload a serialized explanation. In this case,
        always interpret_community.mlflow.
    :type loader_module: str
    :param data_path: The path to the serialized explanation files.
    :type data_path: str
    :param conda_env: The path to a YAML file with basic Python environment information.
    :type conda_env: str
    :param mlflow_model: In our case, always None.
    :type mlflow_model: None
    :return: The MLflow model representation of the explanation.
    :rtype: mlflow.models.Model
    """

    try:
        import mlflow
        from mlflow.models import Model
        from mlflow.utils.file_utils import _copy_file_or_tree
        from mlflow.pyfunc.model import get_default_conda_env
    except ImportError as e:
        raise Exception("Could not log_explanation to mlflow. Missing mlflow dependency, "
                        "pip install mlflow to resolve the error: {}.".format(e))

    if os.path.exists(path):
        raise Exception(
            message="Path '{}' already exists".format(path))
    os.makedirs(path)

    if mlflow_model is None:
        mlflow_model = Model()

    data = None
    if data_path is not None:
        model_file = _copy_file_or_tree(src=data_path, dst=path, dst_dir="data")
        data = model_file

    conda_env_subpath = "mlflow_env.yml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    mlflow.pyfunc.add_to_model(
        mlflow_model, loader_module=loader_module, data=data, env=conda_env_subpath, **kwargs)
    mlflow_model.save(os.path.join(path, 'MLmodel'))
    return mlflow_model
Ejemplo n.º 7
0
def _validate_and_copy_code_paths(code_paths, path, default_subpath="code"):
    """
    Validates that a code path is a valid list and copies the code paths to a directory. This
    can later be used to log custom code as an artifact.

    :param code_paths: A list of files or directories containing code that should be logged
    as artifacts
    :param path: The local model path.
    :param default_subpath: The default directory name used to store code artifacts.
    """
    _validate_code_paths(code_paths)
    if code_paths is not None:
        code_dir_subpath = default_subpath
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path,
                               dst=path,
                               dst_dir=code_dir_subpath)
    else:
        code_dir_subpath = None
    return code_dir_subpath
Ejemplo n.º 8
0
def save_model(dst_path, loader_module, data_path=None, code_path=(), conda_env=None,
               model=Model()):
    """
    Export model as a generic Python function model.

    :param dst_path: Path where the model is stored.
    :param loader_module: The module to be used to load the model.
    :param data_path: Path to a file or directory containing model data.
    :param code_path: List of paths (file or dir) contains code dependencies not present in
                      the environment. Every path in the ``code_path`` is added to the Python
                      path before the model is loaded.
    :param conda_env: Path to the Conda environment definition. This environment is activated
                      prior to running model code.
    :return: Model configuration containing model info.

    """
    if os.path.exists(dst_path):
        raise Exception("Path '{}' already exists".format(dst_path))
    os.makedirs(dst_path)
    code = None
    data = None
    env = None

    if data_path:
        model_file = _copy_file_or_tree(src=data_path, dst=dst_path, dst_dir="data")
        data = model_file

    if code_path:
        for path in code_path:
            _copy_file_or_tree(src=path, dst=dst_path, dst_dir="code")
        code = "code"

    if conda_env:
        shutil.copy(src=conda_env, dst=os.path.join(dst_path, "mlflow_env.yml"))
        env = "mlflow_env.yml"

    add_to_model(model, loader_module=loader_module, code=code, data=data, env=env)
    model.save(os.path.join(dst_path, 'MLmodel'))
    return model
Ejemplo n.º 9
0
Archivo: model.py Proyecto: zge/mlflow
def _load_pyfunc(model_path):
    pyfunc_config = _get_flavor_configuration(
        model_path=model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME)

    python_model_cloudpickle_version = pyfunc_config.get(
        CONFIG_KEY_CLOUDPICKLE_VERSION, None)
    if python_model_cloudpickle_version is None:
        mlflow.pyfunc._logger.warning(
            "The version of CloudPickle used to save the model could not be found in the MLmodel"
            " configuration")
    elif python_model_cloudpickle_version != cloudpickle.__version__:
        # CloudPickle does not have a well-defined cross-version compatibility policy. Micro version
        # releases have been known to cause incompatibilities. Therefore, we match on the full
        # library version
        mlflow.pyfunc._logger.warning(
            "The version of CloudPickle that was used to save the model, `CloudPickle %s`, differs"
            " from the version of CloudPickle that is currently running, `CloudPickle %s`, and may"
            " be incompatible", python_model_cloudpickle_version,
            cloudpickle.__version__)

    python_model_subpath = pyfunc_config.get(CONFIG_KEY_PYTHON_MODEL, None)
    if python_model_subpath is None:
        raise MlflowException(
            "Python model path was not specified in the model configuration")
    with open(os.path.join(model_path, python_model_subpath), "rb") as f:
        python_model = cloudpickle.load(f)

    # TODO: If the longevity of the temporary directory prior becomes problematic, consider using
    # an alternative solution.
    tmp_artifacts_dir_path = tempfile.mkdtemp(suffix="artifacts")
    artifacts = {}
    for saved_artifact_name, saved_artifact_info in\
            pyfunc_config.get(CONFIG_KEY_ARTIFACTS, {}).items():
        tmp_artifact_path = os.path.join(
            tmp_artifacts_dir_path,
            _copy_file_or_tree(src=os.path.join(
                model_path,
                saved_artifact_info[CONFIG_KEY_ARTIFACT_RELATIVE_PATH]),
                               dst=tmp_artifacts_dir_path,
                               dst_dir=saved_artifact_name))
        artifacts[saved_artifact_name] = tmp_artifact_path

    context = PythonModelContext(artifacts=artifacts)
    python_model.load_context(context=context)
    return _PythonModelPyfuncWrapper(python_model=python_model,
                                     context=context)
Ejemplo n.º 10
0
def save_model(
    vega_saved_model_dir,
    path,
    mlflow_model=None,
    conda_env=None,
    signature=None,
    input_example=None,
):
    """
    """
    if os.path.exists(path):
        raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY)
    os.makedirs(path)
    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        # _save_example(mlflow_model, input_example, path)
        pass
    root_relative_path = _copy_file_or_tree(src=vega_saved_model_dir, dst=path, dst_dir=None)
    model_dir_subpath = 'vegamodel'
    shutil.move(os.path.join(path, root_relative_path), os.path.join(path, model_dir_subpath))

    conda_env_subpath = 'conda.yaml'
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, 'r') as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    mlflow_model.add_flavor(
        FLAVOR_NAME,
        saved_model_dir=model_dir_subpath,
    )
    pyfunc.add_to_model(mlflow_model, loader_module="mlflow_vega", env=conda_env_subpath)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
Ejemplo n.º 11
0
def save_model(pytorch_model,
               path,
               conda_env=None,
               mlflow_model=Model(),
               code_paths=None,
               pickle_module=None,
               **kwargs):
    """
    Save a PyTorch model to a path on the local file system.

    :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as
                          input and produce a single output tensor. Any code dependencies of the
                          model's class, including the class definition itself, should be
                          included in one of the following locations:

                          - The package(s) listed in the model's Conda environment, specified
                            by the ``conda_env`` parameter.
                          - One or more of the files specified by the ``code_paths`` parameter.

    :param path: Local path where the model is to be saved.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If `None`, the default
                      :func:`get_default_conda_env()` environment is added to the model. The
                      following is an *example* dictionary representation of a Conda environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pytorch=0.4.1',
                                'torchvision=0.2.1'
                            ]
                        }

    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files are *prepended* to the system
                       path when the model is loaded.
    :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified
                          ``pytorch_model``. This is passed as the ``pickle_module`` parameter
                          to ``torch.save()``. By default, this module is also used to
                          deserialize ("unpickle") the PyTorch model at load time.
    :param kwargs: kwargs to pass to ``torch.save`` method.

    >>> import torch
    >>> import mlflow
    >>> import mlflow.pytorch
    >>> # create model and set values
    >>> pytorch_model = Model()
    >>> pytorch_model_path = ...
    >>> #train our model
    >>> for epoch in range(500):
    >>>     y_pred = model(x_data)
    >>>     ...
    >>> #save the model
    >>> with mlflow.start_run() as run:
    >>>   mlflow.log_param("epochs", 500)
    >>>   mlflow.pytorch.save_model(pytorch_model, pytorch_model_path)
    """
    import torch
    pickle_module = pickle_module or mlflow_pytorch_pickle_module

    if not isinstance(pytorch_model, torch.nn.Module):
        raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module")

    path = os.path.abspath(path)
    if os.path.exists(path):
        raise RuntimeError("Path '{}' already exists".format(path))
    os.makedirs(path)

    model_data_subpath = "data"
    model_data_path = os.path.join(path, model_data_subpath)
    os.makedirs(model_data_path)
    # Persist the pickle module name as a file in the model's `data` directory. This is necessary
    # because the `data` directory is the only available parameter to `_load_pyfunc`, and it
    # does not contain the MLmodel configuration; therefore, it is not sufficient to place
    # the module name in the MLmodel
    #
    # TODO: Stop persisting this information to the filesystem once we have a mechanism for
    # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc`
    pickle_module_path = os.path.join(model_data_path,
                                      _PICKLE_MODULE_INFO_FILE_NAME)
    with open(pickle_module_path, "w") as f:
        f.write(pickle_module.__name__)
    # Save pytorch model
    model_path = os.path.join(model_data_path,
                              _SERIALIZED_TORCH_MODEL_FILE_NAME)
    torch.save(pytorch_model,
               model_path,
               pickle_module=pickle_module,
               **kwargs)

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    if code_paths is not None:
        code_dir_subpath = "code"
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path,
                               dst=path,
                               dst_dir=code_dir_subpath)
    else:
        code_dir_subpath = None

    mlflow_model.add_flavor(FLAVOR_NAME,
                            model_data=model_data_subpath,
                            pytorch_version=torch.__version__)
    pyfunc.add_to_model(mlflow_model,
                        loader_module="mlflow.pytorch",
                        data=model_data_subpath,
                        pickle_module_name=pickle_module.__name__,
                        code=code_dir_subpath,
                        env=conda_env_subpath)
    mlflow_model.save(os.path.join(path, "MLmodel"))
Ejemplo n.º 12
0
def save_model(tf_saved_model_dir,
               tf_meta_graph_tags,
               tf_signature_def_key,
               path,
               mlflow_model=Model(),
               conda_env=None):
    """
    Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model
    to a local path. This method operates on TensorFlow variables and graphs that have been
    serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel``
    format, see the TensorFlow documentation:
    https://www.tensorflow.org/guide/saved_model#save_and_restore_models.

    :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and
                               graphs in ``SavedModel`` format.
    :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the
                               serialized ``SavedModel`` object. For more information, see the
                               ``tags`` parameter of the
                               ``tf.saved_model.builder.savedmodelbuilder`` method.
    :param tf_signature_def_key: A string identifying the input/output signature associated with the
                                 model. This is a key within the serialized ``savedmodel``
                                 signature definition mapping. For more information, see the
                                 ``signature_def_map`` parameter of the
                                 ``tf.saved_model.builder.savedmodelbuilder`` method.
    :param path: Local path where the MLflow model is to be saved.
    :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model. The
                      following is an *example* dictionary representation of a Conda environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'tensorflow=1.8.0'
                            ]
                        }

    """
    _logger.info(
        "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow"
        " graph...")
    _validate_saved_model(tf_saved_model_dir=tf_saved_model_dir,
                          tf_meta_graph_tags=tf_meta_graph_tags,
                          tf_signature_def_key=tf_signature_def_key)
    _logger.info("Validation succeeded!")

    if os.path.exists(path):
        raise MlflowException("Path '{}' already exists".format(path),
                              DIRECTORY_NOT_EMPTY)
    os.makedirs(path)
    root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir,
                                            dst=path,
                                            dst_dir=None)
    model_dir_subpath = "tfmodel"
    shutil.move(os.path.join(path, root_relative_path),
                os.path.join(path, model_dir_subpath))

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    mlflow_model.add_flavor(FLAVOR_NAME,
                            saved_model_dir=model_dir_subpath,
                            meta_graph_tags=tf_meta_graph_tags,
                            signature_def_key=tf_signature_def_key)
    pyfunc.add_to_model(mlflow_model,
                        loader_module="mlflow.tensorflow",
                        env=conda_env_subpath)
    mlflow_model.save(os.path.join(path, "MLmodel"))
Ejemplo n.º 13
0
def save_model(
    fonduer_model: FonduerModel,
    path: str,
    conn_string: str,
    mlflow_model: Model = Model(),
    code_paths: Optional[List[str]] = None,
    parallel: Optional[int] = 1,
    model_type: Optional[str] = "discriminative",
    labeler: Optional[Labeler] = None,
    gen_models: Optional[List[LabelModel]] = None,
    featurizer: Optional[Featurizer] = None,
    disc_model: Optional[Classifier] = None,
) -> None:
    """Save a custom MLflow model to a path on the local file system.

    :param fonduer_model: the model to be saved.
    :param path: the path on the local file system.
    :param conn_string: the connection string.
    :param mlflow_model: model configuration.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are prepended to the system path when the model is loaded.
    :param parallel: the number of parallelism.
    :param model_type: the model type, either "discriminative" or "generative", defaults to "discriminative".
    :param labeler: a labeler, defaults to None.
    :param gen_models: a list of generative models, defaults to None.
    :param featurizer: a featurizer, defaults to None.
    :param disc_model: a discriminative model, defaults to None.
    """
    os.makedirs(path)
    model_code_path = os.path.join(path, pyfunc.CODE)
    os.makedirs(model_code_path)

    with open(os.path.join(path, "fonduer_model.pkl"), "wb") as f:
        pickle.dump(fonduer_model, f)
    if model_type == "discriminative":
        key_names = [key.name for key in featurizer.get_keys()]
        with open(os.path.join(path, "feature_keys.pkl"), "wb") as f:
            pickle.dump(key_names, f)
        disc_model.save(model_file="best_model.pt", save_dir=path)
    else:
        for candidate_class, gen_model in zip(labeler.candidate_classes,
                                              gen_models):
            gen_model.save(
                os.path.join(path, candidate_class.__name__ + ".pkl"))

        key_names = [key.name for key in labeler.get_keys()]
        with open(os.path.join(path, "labeler_keys.pkl"), "wb") as f:
            pickle.dump(key_names, f)

    _copy_file_or_tree(src=__file__, dst=model_code_path)
    if code_paths is not None:
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=model_code_path)

    mlflow_model.add_flavor(
        pyfunc.FLAVOR_NAME,
        code=pyfunc.CODE,
        loader_module=__name__,
        conn_string=conn_string,
        parallel=parallel,
        model_type=model_type,
    )
    mlflow_model.save(os.path.join(path, "MLmodel"))
Ejemplo n.º 14
0
def save_model(pytorch_model, path, conda_env=None, mlflow_model=Model(), code_paths=None,
               **kwargs):
    """
    Save a PyTorch model to a path on the local file system.

    :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as
                          input and produce a single output tensor. Any code dependencies of the
                          model's class, including the class definition itself, should be
                          included in one of the following locations:

                          - The package(s) listed in the model's Conda environment, specified
                            by the ``conda_env`` parameter.
                          - One or more of the files specified by the ``code_paths`` parameter.

    :param path: Local path where the model is to be saved.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in ``mlflow.pytorch.DEFAULT_CONDA_ENV``. If `None`, the default
                      ``mlflow.pytorch.DEFAULT_CONDA_ENV`` environment will be added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pytorch=0.4.1',
                                'torchvision=0.2.1'
                            ]
                        }

    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files will be *prepended* to the system
                       path when the model is loaded.
    :param kwargs: kwargs to pass to ``torch.save`` method.

    >>> import torch
    >>> import mlflow
    >>> import mlflow.pytorch
    >>> # create model and set values
    >>> pytorch_model = Model()
    >>> pytorch_model_path = ...
    >>> #train our model
    >>> for epoch in range(500):
    >>>     y_pred = model(x_data)
    >>>     ...
    >>> #save the model
    >>> with mlflow.start_run() as run:
    >>>   mlflow.log_param("epochs", 500)
    >>>   mlflow.pytorch.save_model(pytorch_model, pytorch_model_path)
    """
    if not isinstance(pytorch_model, torch.nn.Module):
        raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module")

    path = os.path.abspath(path)
    if os.path.exists(path):
        raise RuntimeError("Path '{}' already exists".format(path))
    os.makedirs(path)
    model_path = os.path.join(path, "model.pth")

    # Save pytorch model
    torch.save(pytorch_model, model_path, **kwargs)
    model_file = os.path.basename(model_path)

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = DEFAULT_CONDA_ENV
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    if code_paths is not None:
        code_dir_subpath = "code"
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath)
    else:
        code_dir_subpath = None

    mlflow_model.add_flavor(FLAVOR_NAME, model_data=model_file, pytorch_version=torch.__version__)
    pyfunc.add_to_model(mlflow_model, loader_module="mlflow.pytorch", data=model_file,
                        code=code_dir_subpath, env=conda_env_subpath)
    mlflow_model.save(os.path.join(path, "MLmodel"))
Ejemplo n.º 15
0
def save_model(
    pytorch_model,
    path,
    conda_env=None,
    mlflow_model=None,
    code_paths=None,
    pickle_module=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    requirements_file=None,
    extra_files=None,
    pip_requirements=None,
    extra_pip_requirements=None,
    **kwargs,
):
    """
    Save a PyTorch model to a path on the local file system.

    :param pytorch_model: PyTorch model to be saved. Can be either an eager model (subclass of
                          ``torch.nn.Module``) or scripted model prepared via ``torch.jit.script``
                          or ``torch.jit.trace``.

                          The model accept a single ``torch.FloatTensor`` as
                          input and produce a single output tensor.

                          If saving an eager model, any code dependencies of the
                          model's class, including the class definition itself, should be
                          included in one of the following locations:

                          - The package(s) listed in the model's Conda environment, specified
                            by the ``conda_env`` parameter.
                          - One or more of the files specified by the ``code_paths`` parameter.

    :param path: Local path where the model is to be saved.
    :param conda_env: {{ conda_env }}
    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files are *prepended* to the system
                       path when the model is loaded.
    :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified
                          ``pytorch_model``. This is passed as the ``pickle_module`` parameter
                          to ``torch.save()``. By default, this module is also used to
                          deserialize ("unpickle") the PyTorch model at load time.

    :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example can be a Pandas DataFrame where the given
                          example will be serialized to json using the Pandas split-oriented
                          format, or a numpy array where the example will be serialized to json
                          by converting it to a list. Bytes are base64-encoded.

    :param requirements_file:

        .. warning::

            ``requirements_file`` has been deprecated. Please use ``pip_requirements`` instead.

        A string containing the path to requirements file. Remote URIs are resolved to absolute
        filesystem paths. For example, consider the following ``requirements_file`` string:

        .. code-block:: python

            requirements_file = "s3://my-bucket/path/to/my_file"

        In this case, the ``"my_file"`` requirements file is downloaded from S3. If ``None``,
        no requirements file is added to the model.

    :param extra_files: A list containing the paths to corresponding extra files. Remote URIs
                      are resolved to absolute filesystem paths.
                      For example, consider the following ``extra_files`` list -

                      extra_files = ["s3://my-bucket/path/to/my_file1",
                                    "s3://my-bucket/path/to/my_file2"]

                      In this case, the ``"my_file1 & my_file2"`` extra file is downloaded from S3.

                      If ``None``, no extra files are added to the model.
    :param pip_requirements: {{ pip_requirements }}
    :param extra_pip_requirements: {{ extra_pip_requirements }}
    :param kwargs: kwargs to pass to ``torch.save`` method.

    .. code-block:: python
        :caption: Example

        import os

        import torch
        import mlflow.pytorch

        # Class defined here
        class LinearNNModel(torch.nn.Module):
            ...

        # Initialize our model, criterion and optimizer
        ...

        # Training loop
        ...

        # Save PyTorch models to current working directory
        with mlflow.start_run() as run:
            mlflow.pytorch.save_model(model, "model")

            # Convert to a scripted model and save it
            scripted_pytorch_model = torch.jit.script(model)
            mlflow.pytorch.save_model(scripted_pytorch_model, "scripted_model")

        # Load each saved model for inference
        for model_path in ["model", "scripted_model"]:
            model_uri = "{}/{}".format(os.getcwd(), model_path)
            loaded_model = mlflow.pytorch.load_model(model_uri)
            print("Loaded {}:".format(model_path))
            for x in [6.0, 8.0, 12.0, 30.0]:
                X = torch.Tensor([[x]])
                y_pred = loaded_model(X)
                print("predict X: {}, y_pred: {:.2f}".format(x, y_pred.data.item()))
            print("--")

    .. code-block:: text
        :caption: Output

        Loaded model:
        predict X: 6.0, y_pred: 11.90
        predict X: 8.0, y_pred: 15.92
        predict X: 12.0, y_pred: 23.96
        predict X: 30.0, y_pred: 60.13
        --
        Loaded scripted_model:
        predict X: 6.0, y_pred: 11.90
        predict X: 8.0, y_pred: 15.92
        predict X: 12.0, y_pred: 23.96
        predict X: 30.0, y_pred: 60.13
    """
    import torch

    _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements)

    pickle_module = pickle_module or mlflow_pytorch_pickle_module

    if not isinstance(pytorch_model, torch.nn.Module):
        raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module")
    if code_paths is not None:
        if not isinstance(code_paths, list):
            raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths)))
    path = os.path.abspath(path)
    if os.path.exists(path):
        raise RuntimeError("Path '{}' already exists".format(path))

    if mlflow_model is None:
        mlflow_model = Model()

    os.makedirs(path)
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)

    model_data_subpath = "data"
    model_data_path = os.path.join(path, model_data_subpath)
    os.makedirs(model_data_path)
    # Persist the pickle module name as a file in the model's `data` directory. This is necessary
    # because the `data` directory is the only available parameter to `_load_pyfunc`, and it
    # does not contain the MLmodel configuration; therefore, it is not sufficient to place
    # the module name in the MLmodel
    #
    # TODO: Stop persisting this information to the filesystem once we have a mechanism for
    # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc`
    pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME)
    with open(pickle_module_path, "w") as f:
        f.write(pickle_module.__name__)
    # Save pytorch model
    model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME)
    if isinstance(pytorch_model, torch.jit.ScriptModule):
        torch.jit.ScriptModule.save(pytorch_model, model_path)
    else:
        torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs)

    torchserve_artifacts_config = {}

    if extra_files:
        torchserve_artifacts_config[_EXTRA_FILES_KEY] = []
        if not isinstance(extra_files, list):
            raise TypeError("Extra files argument should be a list")

        with TempDir() as tmp_extra_files_dir:
            for extra_file in extra_files:
                _download_artifact_from_uri(
                    artifact_uri=extra_file, output_path=tmp_extra_files_dir.path()
                )
                rel_path = posixpath.join(_EXTRA_FILES_KEY, os.path.basename(extra_file))
                torchserve_artifacts_config[_EXTRA_FILES_KEY].append({"path": rel_path})
            shutil.move(
                tmp_extra_files_dir.path(),
                posixpath.join(path, _EXTRA_FILES_KEY),
            )

    if requirements_file:

        warnings.warn(
            "`requirements_file` has been deprecated. Please use `pip_requirements` instead.",
            FutureWarning,
            stacklevel=2,
        )

        if not isinstance(requirements_file, str):
            raise TypeError("Path to requirements file should be a string")

        with TempDir() as tmp_requirements_dir:
            _download_artifact_from_uri(
                artifact_uri=requirements_file, output_path=tmp_requirements_dir.path()
            )
            rel_path = os.path.basename(requirements_file)
            torchserve_artifacts_config[_REQUIREMENTS_FILE_KEY] = {"path": rel_path}
            shutil.move(tmp_requirements_dir.path(rel_path), path)

    if code_paths is not None:
        code_dir_subpath = "code"
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath)
    else:
        code_dir_subpath = None

    mlflow_model.add_flavor(
        FLAVOR_NAME,
        model_data=model_data_subpath,
        pytorch_version=str(torch.__version__),
        **torchserve_artifacts_config,
    )
    pyfunc.add_to_model(
        mlflow_model,
        loader_module="mlflow.pytorch",
        data=model_data_subpath,
        pickle_module_name=pickle_module.__name__,
        code=code_dir_subpath,
        env=_CONDA_ENV_FILE_NAME,
    )
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))

    if conda_env is None:
        if pip_requirements is None:
            default_reqs = get_default_pip_requirements()
            # To ensure `_load_pyfunc` can successfully load the model during the dependency
            # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file.
            inferred_reqs = mlflow.models.infer_pip_requirements(
                model_data_path,
                FLAVOR_NAME,
                fallback=default_reqs,
            )
            default_reqs = sorted(set(inferred_reqs).union(default_reqs))
        else:
            default_reqs = None
        conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
            default_reqs,
            pip_requirements,
            extra_pip_requirements,
        )
    else:
        conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env)

    with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Save `constraints.txt` if necessary
    if pip_constraints:
        write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints))

    if not requirements_file:
        # Save `requirements.txt`
        write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
Ejemplo n.º 16
0
def build_image(model_uri,
                workspace,
                image_name=None,
                model_name=None,
                mlflow_home=None,
                description=None,
                tags=None,
                synchronous=True):
    """
    Register an MLflow model with Azure ML and build an Azure ML ContainerImage for deployment.
    The resulting image can be deployed as a web service to Azure Container Instances (ACI) or
    Azure Kubernetes Service (AKS).

    The resulting Azure ML ContainerImage will contain a webserver that processes model queries.
    For information about the input data formats accepted by this webserver, see the
    :ref:`MLflow deployment tools documentation <azureml_deployment>`.

    :param model_uri: The location, in URI format, of the MLflow model for which to build an Azure
                      ML deployment image, for example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``

                      For more information about supported URI schemes, see the
                      `Artifacts Documentation <https://www.mlflow.org/docs/latest/tracking.html#
                      supported-artifact-stores>`_.

    :param image_name: The name to assign the Azure Container Image that will be created. If
                       unspecified, a unique image name will be generated.
    :param model_name: The name to assign the Azure Model will be created. If unspecified,
                       a unique model name will be generated.
    :param workspace: The AzureML workspace in which to build the image. This is a
                      `azureml.core.Workspace` object.
    :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the
                        image will install MLflow from this directory. Otherwise, it will install
                        MLflow from pip.
    :param description: A string description to associate with the Azure Container Image and the
                        Azure Model that will be created. For more information, see
                        `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                        azureml.core.image.container.containerimageconfig>`_ and
                        `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                        azureml.core.model.model?view=azure-ml-py#register>`_.
    :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to
                 associate with the Azure Container Image and the Azure Model that will be created.
                 These tags will be added to a set of default tags that include the model path,
                 the model run id (if specified), and more. For more information, see
                 `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                 azureml.core.image.container.containerimageconfig>`_ and
                 `<https://docs.microsoft.com/en-us/python/api/azureml-core/
                 azureml.core.model.model?view=azure-ml-py#register>`_.
    :param synchronous: If `True`, this method will block until the image creation procedure
                        terminates before returning. If `False`, the method will return immediately,
                        but the returned image will not be available until the asynchronous
                        creation process completes. The `azureml.core.Image.wait_for_creation()`
                        function can be used to wait for the creation process to complete.
    :return: A tuple containing the following elements in order:
             - An `azureml.core.image.ContainerImage` object containing metadata for the new image.
             - An `azureml.core.model.Model` object containing metadata for the new model.

    >>> import mlflow.azureml
    >>> from azureml.core import Workspace
    >>> from azureml.core.webservice import AciWebservice, Webservice
    >>>
    >>> # Load or create an Azure ML Workspace
    >>> workspace_name = "<Name of your Azure ML workspace>"
    >>> subscription_id = "<Your Azure subscription ID>"
    >>> resource_group = "<Name of the Azure resource group in which to create Azure ML resources>"
    >>> location = "<Name of the Azure location (region) in which to create Azure ML resources>"
    >>> azure_workspace = Workspace.create(name=workspace_name,
    >>>                                    subscription_id=subscription_id,
    >>>                                    resource_group=resource_group,
    >>>                                    location=location,
    >>>                                    create_resource_group=True,
    >>>                                    exist_okay=True)
    >>>
    >>> # Build an Azure ML Container Image for an MLflow model
    >>> azure_image, azure_model = mlflow.azureml.build_image(
    >>>                                 model_path="<model_path>",
    >>>                                 workspace=azure_workspace,
    >>>                                 synchronous=True)
    >>> # If your image build failed, you can access build logs at the following URI:
    >>> print("Access the following URI for build logs: {}".format(azure_image.image_build_log_uri))
    >>>
    >>> # Deploy the image to Azure Container Instances (ACI) for real-time serving
    >>> webservice_deployment_config = AciWebservice.deploy_configuration()
    >>> webservice = Webservice.deploy_from_image(
    >>>                    image=azure_image, workspace=azure_workspace, name="<deployment-name>")
    >>> webservice.wait_for_deployment()
    """
    # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should
    # still be accessible for import from Python 2. Therefore, we will only import from the SDK
    # upon method invocation.
    # pylint: disable=import-error
    from azureml.core.image import ContainerImage
    from azureml.core.model import Model as AzureModel

    absolute_model_path = _download_artifact_from_uri(model_uri)

    model_pyfunc_conf = _load_pyfunc_conf(model_path=absolute_model_path)
    model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None)
    if model_python_version is not None and\
            StrictVersion(model_python_version) < StrictVersion("3.0.0"):
        raise MlflowException(message=(
            "Azure ML can only deploy models trained in Python 3 or above! Please see"
            " the following MLflow GitHub issue for a thorough explanation of this"
            " limitation and a workaround to enable support for deploying models"
            " trained in Python 2: https://github.com/mlflow/mlflow/issues/668"
        ),
                              error_code=INVALID_PARAMETER_VALUE)

    tags = _build_tags(model_uri=model_uri,
                       model_python_version=model_python_version,
                       user_tags=tags)

    if image_name is None:
        image_name = _get_mlflow_azure_resource_name()
    if model_name is None:
        model_name = _get_mlflow_azure_resource_name()

    with TempDir(chdr=True) as tmp:
        model_directory_path = tmp.path("model")
        tmp_model_path = os.path.join(
            model_directory_path,
            _copy_file_or_tree(src=absolute_model_path,
                               dst=model_directory_path))

        registered_model = AzureModel.register(workspace=workspace,
                                               model_path=tmp_model_path,
                                               model_name=model_name,
                                               tags=tags,
                                               description=description)
        _logger.info(
            "Registered an Azure Model with name: `%s` and version: `%s`",
            registered_model.name, registered_model.version)

        # Create an execution script (entry point) for the image's model server. Azure ML requires
        # the container's execution script to be located in the current working directory during
        # image creation, so we create the execution script as a temporary file in the current
        # working directory.
        execution_script_path = tmp.path("execution_script.py")
        _create_execution_script(output_path=execution_script_path,
                                 azure_model=registered_model)
        # Azure ML copies the execution script into the image's application root directory by
        # prepending "/var/azureml-app" to the specified script path. The script is then executed
        # by referencing its path relative to the "/var/azureml-app" directory. Unfortunately,
        # if the script path is an absolute path, Azure ML attempts to reference it directly,
        # resulting in a failure. To circumvent this problem, we provide Azure ML with the relative
        # script path. Because the execution script was created in the current working directory,
        # this relative path is the script path's base name.
        execution_script_path = os.path.basename(execution_script_path)

        if mlflow_home is not None:
            _logger.info(
                "Copying the specified mlflow_home directory: `%s` to a temporary location for"
                " container creation", mlflow_home)
            mlflow_home = os.path.join(
                tmp.path(),
                _copy_project(src_path=mlflow_home, dst_path=tmp.path()))
            image_file_dependencies = [mlflow_home]
        else:
            image_file_dependencies = None
        dockerfile_path = tmp.path("Dockerfile")
        _create_dockerfile(output_path=dockerfile_path,
                           mlflow_path=mlflow_home)

        conda_env_path = None
        if pyfunc.ENV in model_pyfunc_conf:
            conda_env_path = os.path.join(tmp_model_path,
                                          model_pyfunc_conf[pyfunc.ENV])

        image_configuration = ContainerImage.image_configuration(
            execution_script=execution_script_path,
            runtime="python",
            docker_file=dockerfile_path,
            dependencies=image_file_dependencies,
            conda_file=conda_env_path,
            description=description,
            tags=tags,
        )
        image = ContainerImage.create(workspace=workspace,
                                      name=image_name,
                                      image_config=image_configuration,
                                      models=[registered_model])
        _logger.info(
            "Building an Azure Container Image with name: `%s` and version: `%s`",
            image.name, image.version)
        if synchronous:
            image.wait_for_creation(show_output=True)
        return image, registered_model
Ejemplo n.º 17
0
def create_inference_config(tmp_dir, model_name, model_version, service_name):
    """
    Create the InferenceConfig object which will be used to deploy.

    :param tmp_dir:
    :type tmp_dir:
    :param model_name:
    :type model_name:
    :param model_version:
    :type model_version:
    :param service_name:
    :type service_name:
    :return:
    :rtype:
    """
    absolute_model_path = _download_artifact_from_uri('models:/{}/{}'.format(model_name, model_version))
    model_folder = absolute_model_path.split(os.path.sep)[-1]
    model_directory_path = tmp_dir.path("model")
    tmp_model_path = os.path.join(
        model_directory_path,
        _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path),
    )

    # Create environment
    env_name = service_name + "-env"
    env_name = env_name[:32]
    mlflow_model = Model.load(os.path.join(absolute_model_path, MLMODEL_FILE_NAME))

    model_pyfunc_conf = load_pyfunc_conf(mlflow_model)
    if pyfunc.ENV in model_pyfunc_conf:
        environment = AzureEnvironment.from_conda_specification(
            env_name,
            os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV])
        )
    else:
        raise MlflowException('Error, no environment information provided with model')

    sample_input_df = None
    sample_output_df = None

    # Leaving this here, commented out for now. The issue is that our swagger handling doesn't work with OpenAPI 3.
    # This runs into issues because a pandas dataframe in a split orient (the default) can have arrays of mixed
    # types, which isn't supported in OpenAPI 2. So for now, we will only use the empty signature to generate
    # swagger, and when we've updated our swagger handling to support OpenAPI 3 we can add this back in.
    """
    if mlflow_model.saved_input_example_info:
        sample_input_file_path = os.path.join(absolute_model_path,
                                              mlflow_model.saved_input_example_info['artifact_path'])
        with open(sample_input_file_path, 'r') as sample_input_file:
            if mlflow_model.saved_input_example_info['type'] == 'dataframe':
                sample_input_df = pandas.read_json(sample_input_file,
                                                   orient=mlflow_model.saved_input_example_info['pandas_orient'])
            else:
                raise MlflowException('Sample model input must be of type "dataframe"')
    """

    if mlflow_model.signature:
        if mlflow_model.signature.inputs and sample_input_df is None:
            # 'is None' check is necessary because dataframes don't like being used as truth values
            columns = mlflow_model.signature.inputs.column_names()
            types = mlflow_model.signature.inputs.pandas_types()
            schema = {}
            for c, t in zip(columns, types):
                schema[c] = t
            df = pandas.DataFrame(columns=columns)
            sample_input_df = df.astype(dtype=schema)
        if mlflow_model.signature.outputs and sample_output_df is None:
            columns = mlflow_model.signature.outputs.column_names()
            types = mlflow_model.signature.outputs.pandas_types()
            schema = {}
            for c, t in zip(columns, types):
                schema[c] = t
            df = pandas.DataFrame(columns=columns)
            sample_output_df = df.astype(dtype=schema)

    # Create execution script
    execution_script_path = tmp_dir.path("execution_script.py")
    create_execution_script(execution_script_path, model_folder, sample_input_df, sample_output_df)

    # Add inference dependencies
    environment.python.conda_dependencies.add_pip_package("mlflow=={}".format(mlflow_version))
    environment.python.conda_dependencies.add_pip_package("inference-schema>=1.2.0")
    environment.python.conda_dependencies.add_pip_package("azureml-model-management-sdk==1.0.1b6.post1")
    environment.python.conda_dependencies.add_pip_package("flask==1.0.3")
    environment.python.conda_dependencies.add_pip_package("gunicorn==19.9.0")
    environment.python.conda_dependencies.add_pip_package("applicationinsights>=0.11.7")
    environment.python.conda_dependencies.add_pip_package("werkzeug>=0.16.1,<=1.0.1")

    # Create InferenceConfig
    inference_config = InferenceConfig(entry_script=execution_script_path, environment=environment)

    return inference_config
Ejemplo n.º 18
0
def save_model(
    tf_saved_model_dir,
    tf_meta_graph_tags,
    tf_signature_def_key,
    path,
    mlflow_model=None,
    conda_env=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
):
    """
    Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model
    to a local path. This method operates on TensorFlow variables and graphs that have been
    serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel``
    format, see the TensorFlow documentation:
    https://www.tensorflow.org/guide/saved_model#save_and_restore_models.

    :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and
                               graphs in ``SavedModel`` format.
    :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the
                               serialized ``SavedModel`` object. For more information, see the
                               ``tags`` parameter of the
                               ``tf.saved_model.builder.savedmodelbuilder`` method.
    :param tf_signature_def_key: A string identifying the input/output signature associated with the
                                 model. This is a key within the serialized ``savedmodel``
                                 signature definition mapping. For more information, see the
                                 ``signature_def_map`` parameter of the
                                 ``tf.saved_model.builder.savedmodelbuilder`` method.
    :param path: Local path where the MLflow model is to be saved.
    :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model. The
                      following is an *example* dictionary representation of a Conda environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'tensorflow=1.8.0'
                            ]
                        }
    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.

    """
    _logger.info(
        "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow"
        " graph...")
    _validate_saved_model(
        tf_saved_model_dir=tf_saved_model_dir,
        tf_meta_graph_tags=tf_meta_graph_tags,
        tf_signature_def_key=tf_signature_def_key,
    )
    _logger.info("Validation succeeded!")

    if os.path.exists(path):
        raise MlflowException("Path '{}' already exists".format(path),
                              DIRECTORY_NOT_EMPTY)
    os.makedirs(path)
    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)
    root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir,
                                            dst=path,
                                            dst_dir=None)
    model_dir_subpath = "tfmodel"
    shutil.move(os.path.join(path, root_relative_path),
                os.path.join(path, model_dir_subpath))

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    mlflow_model.add_flavor(
        FLAVOR_NAME,
        saved_model_dir=model_dir_subpath,
        meta_graph_tags=tf_meta_graph_tags,
        signature_def_key=tf_signature_def_key,
    )
    pyfunc.add_to_model(mlflow_model,
                        loader_module="mlflow.tensorflow",
                        env=conda_env_subpath)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
Ejemplo n.º 19
0
def deploy(
    model_uri,
    workspace,
    deployment_config=None,
    service_name=None,
    model_name=None,
    tags=None,
    mlflow_home=None,
    synchronous=True,
):
    """
    Register an MLflow model with Azure ML and deploy a websevice to Azure Container Instances (ACI)
    or Azure Kubernetes Service (AKS).

    The deployed service will contain a webserver that processes model queries.
    For information about the input data formats accepted by this webserver, see the
    :ref:`MLflow deployment tools documentation <azureml_deployment>`.

    :param model_uri: The location, in URI format, of the MLflow model used to build the Azure
                      ML deployment image. For example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
                      - ``models:/<model_name>/<model_version>``
                      - ``models:/<model_name>/<stage>``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html#
                      artifact-locations>`_.
    :param workspace: The AzureML workspace in which to deploy the service. This is a
                      `azureml.core.Workspace` object.
    :param deployment_config: The configuration for the Azure web service. This configuration
                              allows you to specify the resources the webservice will use and
                              the compute cluster it will be deployed in. If unspecified, the web
                              service will be deployed into a Azure Container Instance. This is a
                              `azureml.core.DeploymentConfig` object. For more information, see
                              `<https://docs.microsoft.com/python/api/azureml-core/
                              azureml.core.webservice.aks.aksservicedeploymentconfiguration>`_ and
                              `<https://docs.microsoft.com/en-us/python/api/azureml-core/azureml
                              .core.webservice.aci.aciservicedeploymentconfiguration>`_
    :param service_name: The name to assign the Azure Machine learning webservice that will be
                         created. If unspecified, a unique name will be generated.
    :param model_name: The name to assign the Azure Model will be created. If unspecified,
                       a unique model name will be generated. Only used if the model is not
                       already registered with Azure.
    :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to
                 associate with the Azure Model and Deployment that will be created.
                 These tags are added to a set of default tags that include the model uri,
                 and more. For more information, see
                 `<https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model(class)?view=azure-ml-py>`_.
    :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the
                        image will install MLflow from this directory. Otherwise, it will install
                        MLflow from pip.
    :param synchronous: If ``True``, this method blocks until the image creation procedure
                        terminates before returning. If ``False``, the method returns immediately,
                        but the returned image will not be available until the asynchronous
                        creation process completes. Use the
                        ``azureml.core.Webservice.wait_for_deployment()`` function to wait
                        for the deployment process to complete.
    :return: A tuple containing the following elements in order:
            - An ``azureml.core.webservice.Webservice`` object containing metadata for the
            new service.
            - An ``azureml.core.model.Model`` object containing metadata for the new model.

    .. code-block:: python
        :caption: Example

        import mlflow.azureml
        from azureml.core import Workspace
        from azureml.core.webservice import AciWebservice, Webservice

        # Load or create an Azure ML Workspace
        workspace_name = "<Name of your Azure ML workspace>"
        subscription_id = "<Your Azure subscription ID>"
        resource_group = "<Name of the Azure resource group in which to create Azure ML resources>"
        location = "<Name of the Azure location (region) in which to create Azure ML resources>"
        azure_workspace = Workspace.create(name=workspace_name,
                                           subscription_id=subscription_id,
                                           resource_group=resource_group,
                                           location=location,
                                           create_resource_group=True,
                                           exist_ok=True)

        # Create an Azure Container Instance webservice for an MLflow model
        azure_service, azure_model = mlflow.azureml.deploy(model_uri="<model_uri>",
                                                           service_name="<deployment-name>",
                                                           workspace=azure_workspace,
                                                           synchronous=True)
    """
    # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should
    # still be accessible for import from Python 2. Therefore, we will only import from the SDK
    # upon method invocation.
    # pylint: disable=import-error
    from azureml.core.model import Model as AzureModel, InferenceConfig
    from azureml.core import Environment as AzureEnvironment
    from azureml.core import VERSION as AZUREML_VERSION
    from azureml.core.webservice import AciWebservice

    absolute_model_path = _download_artifact_from_uri(model_uri)

    model_pyfunc_conf, model = _load_pyfunc_conf_with_model(
        model_path=absolute_model_path)
    model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None)
    run_id = None
    run_id_tag = None
    try:
        run_id = model.run_id
        run_id_tag = run_id
    except AttributeError:
        run_id = str(uuid.uuid4())
    if model_python_version is not None and StrictVersion(
            model_python_version) < StrictVersion("3.0.0"):
        raise MlflowException(
            message=
            ("Azure ML can only deploy models trained in Python 3 and above. See"
             " the following MLflow GitHub issue for a thorough explanation of this"
             " limitation and a workaround to enable support for deploying models"
             " trained in Python 2: https://github.com/mlflow/mlflow/issues/668"
             ),
            error_code=INVALID_PARAMETER_VALUE,
        )

    tags = _build_tags(
        model_uri=model_uri,
        model_python_version=model_python_version,
        user_tags=tags,
        run_id=run_id_tag,
    )

    if service_name is None:
        service_name = _get_mlflow_azure_name(run_id)
    if model_name is None:
        model_name = _get_mlflow_azure_name(run_id)

    with TempDir(chdr=True) as tmp:
        model_directory_path = tmp.path("model")
        tmp_model_path = os.path.join(
            model_directory_path,
            _copy_file_or_tree(src=absolute_model_path,
                               dst=model_directory_path),
        )

        registered_model = None
        azure_model_id = None

        # If we are passed a 'models' uri, we will attempt to extract a name and version which
        # can be used to retreive an AzureML Model. This will ignore stage based model uris,
        # which is alright until we have full deployment plugin support.
        #
        # If instead we are passed a 'runs' uri while the user is using the AzureML tracking
        # and registry stores, we will be able to register the model on their behalf using
        # the AzureML plugin, which will maintain lineage between the model and the run that
        # produced it. This returns an MLFlow Model object however, so we'll still need the
        # name and ID in order to retrieve the AzureML Model object which is currently
        # needed to deploy.
        if model_uri.startswith("models:/"):
            m_name = model_uri.split("/")[-2]
            m_version = int(model_uri.split("/")[-1])
            azure_model_id = "{}:{}".format(m_name, m_version)
        elif (model_uri.startswith("runs:/")
              and get_tracking_uri().startswith("azureml")
              and get_registry_uri().startswith("azureml")):
            mlflow_model = mlflow_register_model(model_uri, model_name)
            azure_model_id = "{}:{}".format(mlflow_model.name,
                                            mlflow_model.version)

            _logger.info(
                "Registered an Azure Model with name: `%s` and version: `%s`",
                mlflow_model.name,
                azure_model_id,
            )

        # Attempt to retrieve an AzureML Model object which we intend to deploy
        if azure_model_id:
            try:
                registered_model = AzureModel(workspace, id=azure_model_id)
                _logger.info("Found registered model in AzureML with ID '%s'",
                             azure_model_id)
            except Exception as e:  # pylint: disable=broad-except
                _logger.info(
                    "Unable to find model in AzureML with ID '%s', will register the model.\n"
                    "Exception was: %s",
                    azure_model_id,
                    e,
                )

        # If we have not found a registered model by this point, we will register it on the users'
        # behalf. It is required for a Model to be registered in some way with Azure in order to
        # deploy to Azure, so this is expected for Azure users.
        if not registered_model:
            registered_model = AzureModel.register(workspace=workspace,
                                                   model_path=tmp_model_path,
                                                   model_name=model_name,
                                                   tags=tags)

            _logger.info(
                "Registered an Azure Model with name: `%s` and version: `%s`",
                registered_model.name,
                registered_model.version,
            )

        # Create an execution script (entry point) for the image's model server. Azure ML requires
        # the container's execution script to be located in the current working directory during
        # image creation, so we create the execution script as a temporary file in the current
        # working directory.
        execution_script_path = tmp.path("execution_script.py")
        _create_execution_script(output_path=execution_script_path,
                                 azure_model=registered_model)

        environment = None
        if pyfunc.ENV in model_pyfunc_conf:
            environment = AzureEnvironment.from_conda_specification(
                _get_mlflow_azure_name(run_id),
                os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV]),
            )
        else:
            environment = AzureEnvironment(_get_mlflow_azure_name(run_id))

        if mlflow_home is not None:
            path = tmp.path("dist")
            _logger.info("Bulding temporary MLFlow wheel in %s", path)
            wheel = _create_mlflow_wheel(mlflow_home, path)
            whl_url = AzureEnvironment.add_private_pip_wheel(
                workspace=workspace, file_path=wheel, exist_ok=True)
            environment.python.conda_dependencies.add_pip_package(whl_url)
        else:
            environment.python.conda_dependencies.add_pip_package(
                "mlflow=={}".format(mlflow_version))

        # AzureML requires azureml-defaults to be installed to include
        # flask for the inference server.
        environment.python.conda_dependencies.add_pip_package(
            "azureml-defaults=={}".format(AZUREML_VERSION))

        inference_config = InferenceConfig(entry_script=execution_script_path,
                                           environment=environment)

        if deployment_config is not None:
            if deployment_config.tags is not None:
                # We want more narrowly-scoped tags to win on merge
                tags.update(deployment_config.tags)
            deployment_config.tags = tags
        else:
            deployment_config = AciWebservice.deploy_configuration(tags=tags)

        # Finally, deploy the AzureML Model object to a webservice, and return back
        webservice = AzureModel.deploy(
            workspace=workspace,
            name=service_name,
            models=[registered_model],
            inference_config=inference_config,
            deployment_config=deployment_config,
        )
        _logger.info("Deploying an Azure Webservice with name: `%s`",
                     webservice.name)
        if synchronous:
            webservice.wait_for_deployment(show_output=True)
        return webservice, registered_model
Ejemplo n.º 20
0
def save_model(
    fonduer_model: FonduerModel,
    path: str,
    preprocessor: DocPreprocessor,
    parser: Parser,
    mention_extractor: MentionExtractor,
    candidate_extractor: CandidateExtractor,
    mlflow_model: Model = Model(),
    conda_env: Optional[Union[Dict, str]] = None,
    code_paths: Optional[List[str]] = None,
    model_type: Optional[str] = "emmental",
    labeler: Optional[Labeler] = None,
    lfs: Optional[List[List[Callable]]] = None,
    label_models: Optional[List[LabelModel]] = None,
    featurizer: Optional[Featurizer] = None,
    emmental_model: Optional[EmmentalModel] = None,
    word2id: Optional[Dict] = None,
) -> None:
    """Save a Fonduer model to a path on the local file system.

    :param fonduer_model: Fonduer model to be saved.
    :param path: the path on the local file system.
    :param preprocessor: the doc preprocessor.
    :param parser: self-explanatory
    :param mention_extractor: self-explanatory
    :param candidate_extractor: self-explanatory
    :param mlflow_model: model configuration.
    :param conda_env: Either a dictionary representation of a Conda environment
        or the path to a Conda environment yaml file.
    :param code_paths: A list of local filesystem paths to Python file dependencies,
        or directories containing file dependencies. These files are prepended to the
        system path when the model is loaded.
    :param model_type: the model type, either "emmental" or "label",
        defaults to "emmental".
    :param labeler: a labeler, defaults to None.
    :param lfs: a list of list of labeling functions.
    :param label_models: a list of label models, defaults to None.
    :param featurizer: a featurizer, defaults to None.
    :param emmental_model: an Emmental model, defaults to None.
    :param word2id: a word embedding map.
    """
    os.makedirs(path)
    model_code_path = os.path.join(path, pyfunc.CODE)
    os.makedirs(model_code_path)

    # Save mention_classes and candidate_classes
    _save_mention_classes(mention_extractor.udf_init_kwargs["mention_classes"],
                          path)
    _save_candidate_classes(
        candidate_extractor.udf_init_kwargs["candidate_classes"], path)
    # Makes lfs unpicklable w/o the module (ie fonduer_lfs.py)
    # https://github.com/cloudpipe/cloudpickle/issues/206#issuecomment-555939172
    modules = []
    if model_type == "label":
        for _ in lfs:
            for lf in _:
                modules.append(lf.__module__)
                lf.__module__ = "__main__"

    # Note that instances of ParserUDF and other UDF theselves are not picklable.
    # https://stackoverflow.com/a/52026025
    model = {
        "fonduer_model": fonduer_model,
        "preprosessor": preprocessor,
        "parser": parser.udf_init_kwargs,
        "mention_extractor": mention_extractor.udf_init_kwargs,
        "candidate_extractor": candidate_extractor.udf_init_kwargs,
    }
    if model_type == "emmental":
        key_names = [key.name for key in featurizer.get_keys()]
        model["feature_keys"] = key_names
        model["word2id"] = word2id
        model["emmental_model"] = _save_emmental_model(emmental_model)
    else:
        key_names = [key.name for key in labeler.get_keys()]
        model["labeler_keys"] = key_names
        model["lfs"] = lfs
        model["label_models_state_dict"] = [
            label_model.__dict__ for label_model in label_models
        ]
    pickle.dump(model, open(os.path.join(path, "model.pkl"), "wb"))

    # Restore __module__ back to the original
    if model_type == "label":
        for _ in lfs:
            for lf in _:
                lf.__module__ = modules.pop()

    # Create a conda yaml file.
    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = _get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Copy code_paths.
    if code_paths is not None:
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=model_code_path)

    mlflow_model.add_flavor(
        pyfunc.FLAVOR_NAME,
        code=pyfunc.CODE,
        loader_module=__name__,
        model_type=model_type,
        env=conda_env_subpath,
    )
    mlflow_model.save(os.path.join(path, "MLmodel"))
Ejemplo n.º 21
0
def _save_model_with_class_artifacts_params(
    path,
    python_model,
    artifacts=None,
    conda_env=None,
    code_paths=None,
    mlflow_model=None,
    pip_requirements=None,
    extra_pip_requirements=None,
):
    """
    :param path: The path to which to save the Python model.
    :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model``
                        defines how the model loads artifacts and how it performs inference.
    :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries.
                      Remote artifact URIs
                      are resolved to absolute filesystem paths, producing a dictionary of
                      ``<name, absolute_path>`` entries. ``python_model`` can reference these
                      resolved entries as the ``artifacts`` property of the ``context``
                      attribute. If ``None``, no artifacts are added to the model.
    :param conda_env: Either a dictionary representation of a Conda environment or the
                      path to a Conda environment yaml file. If provided, this decsribes the
                      environment this model should be run in. At minimum, it should specify
                      the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files are *prepended* to the system
                       path before the model is loaded.
    :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor.
    """
    if mlflow_model is None:
        mlflow_model = Model()

    custom_model_config_kwargs = {
        CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__,
    }
    if isinstance(python_model, PythonModel):
        saved_python_model_subpath = "python_model.pkl"
        with open(os.path.join(path, saved_python_model_subpath), "wb") as out:
            cloudpickle.dump(python_model, out)
        custom_model_config_kwargs[
            CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath
    else:
        raise MlflowException(
            message=
            ("`python_model` must be a subclass of `PythonModel`. Instead, found an"
             " object of type: {python_model_type}".format(
                 python_model_type=type(python_model))),
            error_code=INVALID_PARAMETER_VALUE,
        )

    if artifacts:
        saved_artifacts_config = {}
        with TempDir() as tmp_artifacts_dir:
            tmp_artifacts_config = {}
            saved_artifacts_dir_subpath = "artifacts"
            for artifact_name, artifact_uri in artifacts.items():
                tmp_artifact_path = _download_artifact_from_uri(
                    artifact_uri=artifact_uri,
                    output_path=tmp_artifacts_dir.path())
                tmp_artifacts_config[artifact_name] = tmp_artifact_path
                saved_artifact_subpath = posixpath.join(
                    saved_artifacts_dir_subpath,
                    os.path.relpath(path=tmp_artifact_path,
                                    start=tmp_artifacts_dir.path()),
                )
                saved_artifacts_config[artifact_name] = {
                    CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath,
                    CONFIG_KEY_ARTIFACT_URI: artifact_uri,
                }

            shutil.move(tmp_artifacts_dir.path(),
                        os.path.join(path, saved_artifacts_dir_subpath))
        custom_model_config_kwargs[
            CONFIG_KEY_ARTIFACTS] = saved_artifacts_config

    saved_code_subpath = None
    if code_paths is not None:
        saved_code_subpath = "code"
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path,
                               dst=path,
                               dst_dir=saved_code_subpath)

    mlflow.pyfunc.add_to_model(
        model=mlflow_model,
        loader_module=__name__,
        code=saved_code_subpath,
        env=_CONDA_ENV_FILE_NAME,
        **custom_model_config_kwargs,
    )
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))

    if conda_env is None:
        if pip_requirements is None:
            default_reqs = get_default_pip_requirements()
            # To ensure `_load_pyfunc` can successfully load the model during the dependency
            # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file.
            inferred_reqs = mlflow.models.infer_pip_requirements(
                path,
                mlflow.pyfunc.FLAVOR_NAME,
                fallback=default_reqs,
            )
            default_reqs = sorted(set(inferred_reqs).union(default_reqs))
        else:
            default_reqs = None
        conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
            default_reqs,
            pip_requirements,
            extra_pip_requirements,
        )
    else:
        conda_env, pip_requirements, pip_constraints = _process_conda_env(
            conda_env)

    with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Save `constraints.txt` if necessary
    if pip_constraints:
        write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME),
                 "\n".join(pip_constraints))

    # Save `requirements.txt`
    write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME),
             "\n".join(pip_requirements))

    _PythonEnv.current().to_yaml(os.path.join(path, _PYTHON_ENV_FILE_NAME))
Ejemplo n.º 22
0
def save_model(
    tf_saved_model_dir,
    tf_meta_graph_tags,
    tf_signature_def_key,
    path,
    mlflow_model=None,
    conda_env=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    pip_requirements=None,
    extra_pip_requirements=None,
):
    """
    Save a *serialized* collection of TensorFlow graphs and variables as an MLflow model
    to a local path. This method operates on TensorFlow variables and graphs that have been
    serialized in TensorFlow's ``SavedModel`` format. For more information about ``SavedModel``
    format, see the TensorFlow documentation:
    https://www.tensorflow.org/guide/saved_model#save_and_restore_models.

    :param tf_saved_model_dir: Path to the directory containing serialized TensorFlow variables and
                               graphs in ``SavedModel`` format.
    :param tf_meta_graph_tags: A list of tags identifying the model's metagraph within the
                               serialized ``SavedModel`` object. For more information, see the
                               ``tags`` parameter of the
                               ``tf.saved_model.builder.savedmodelbuilder`` method.
    :param tf_signature_def_key: A string identifying the input/output signature associated with the
                                 model. This is a key within the serialized ``savedmodel``
                                 signature definition mapping. For more information, see the
                                 ``signature_def_map`` parameter of the
                                 ``tf.saved_model.builder.savedmodelbuilder`` method.
    :param path: Local path where the MLflow model is to be saved.
    :param mlflow_model: MLflow model configuration to which to add the ``tensorflow`` flavor.
    :param conda_env: {{ conda_env }}
    :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example can be a Pandas DataFrame where the given
                          example will be serialized to json using the Pandas split-oriented
                          format, or a numpy array where the example will be serialized to json
                          by converting it to a list. Bytes are base64-encoded.
    :param pip_requirements: {{ pip_requirements }}
    :param extra_pip_requirements: {{ extra_pip_requirements }}
    """
    _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements)

    _logger.info(
        "Validating the specified TensorFlow model by attempting to load it in a new TensorFlow"
        " graph..."
    )
    _validate_saved_model(
        tf_saved_model_dir=tf_saved_model_dir,
        tf_meta_graph_tags=tf_meta_graph_tags,
        tf_signature_def_key=tf_signature_def_key,
    )
    _logger.info("Validation succeeded!")

    if os.path.exists(path):
        raise MlflowException("Path '{}' already exists".format(path), DIRECTORY_NOT_EMPTY)
    os.makedirs(path)
    if mlflow_model is None:
        mlflow_model = Model()
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)
    root_relative_path = _copy_file_or_tree(src=tf_saved_model_dir, dst=path, dst_dir=None)
    model_dir_subpath = "tfmodel"
    model_dir_path = os.path.join(path, model_dir_subpath)
    shutil.move(os.path.join(path, root_relative_path), model_dir_path)

    flavor_conf = dict(
        saved_model_dir=model_dir_subpath,
        meta_graph_tags=tf_meta_graph_tags,
        signature_def_key=tf_signature_def_key,
    )
    mlflow_model.add_flavor(FLAVOR_NAME, **flavor_conf)
    pyfunc.add_to_model(mlflow_model, loader_module="mlflow.tensorflow", env=_CONDA_ENV_FILE_NAME)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))

    if conda_env is None:
        if pip_requirements is None:
            default_reqs = get_default_pip_requirements()
            # To ensure `_load_pyfunc` can successfully load the model during the dependency
            # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file.
            inferred_reqs = mlflow.models.infer_pip_requirements(
                path,
                FLAVOR_NAME,
                fallback=default_reqs,
            )
            default_reqs = sorted(set(inferred_reqs).union(default_reqs))
        else:
            default_reqs = None
        conda_env, pip_requirements, pip_constraints = _process_pip_requirements(
            default_reqs,
            pip_requirements,
            extra_pip_requirements,
        )
    else:
        conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env)

    with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    # Save `constraints.txt` if necessary
    if pip_constraints:
        write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints))

    # Save `requirements.txt`
    write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
Ejemplo n.º 23
0
def _save_model_with_class_artifacts_params(path,
                                            python_model,
                                            artifacts=None,
                                            conda_env=None,
                                            code_paths=None,
                                            mlflow_model=Model()):
    """
    :param path: The path to which to save the Python model.
    :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model``
                        defines how the model loads artifacts and how it performs inference.
    :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries.
                      Remote artifact URIs
                      are resolved to absolute filesystem paths, producing a dictionary of
                      ``<name, absolute_path>`` entries. ``python_model`` can reference these
                      resolved entries as the ``artifacts`` property of the ``context``
                      attribute. If ``None``, no artifacts are added to the model.
    :param conda_env: Either a dictionary representation of a Conda environment or the
                      path to a Conda environment yaml file. If provided, this decsribes the
                      environment this model should be run in. At minimum, it should specify
                      the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files are *prepended* to the system
                       path before the model is loaded.
    :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor.
    """
    custom_model_config_kwargs = {
        CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__,
    }
    if isinstance(python_model, PythonModel):
        saved_python_model_subpath = "python_model.pkl"
        with open(os.path.join(path, saved_python_model_subpath), "wb") as out:
            cloudpickle.dump(python_model, out)
        custom_model_config_kwargs[
            CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath
    else:
        raise MlflowException(
            message=
            ("`python_model` must be a subclass of `PythonModel`. Instead, found an"
             " object of type: {python_model_type}".format(
                 python_model_type=type(python_model))),
            error_code=INVALID_PARAMETER_VALUE,
        )

    if artifacts:
        saved_artifacts_config = {}
        with TempDir() as tmp_artifacts_dir:
            tmp_artifacts_config = {}
            saved_artifacts_dir_subpath = "artifacts"
            for artifact_name, artifact_uri in artifacts.items():
                tmp_artifact_path = _download_artifact_from_uri(
                    artifact_uri=artifact_uri,
                    output_path=tmp_artifacts_dir.path())
                tmp_artifacts_config[artifact_name] = tmp_artifact_path
                saved_artifact_subpath = posixpath.join(
                    saved_artifacts_dir_subpath,
                    os.path.relpath(path=tmp_artifact_path,
                                    start=tmp_artifacts_dir.path()),
                )
                saved_artifacts_config[artifact_name] = {
                    CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath,
                    CONFIG_KEY_ARTIFACT_URI: artifact_uri,
                }

            shutil.move(tmp_artifacts_dir.path(),
                        os.path.join(path, saved_artifacts_dir_subpath))
        custom_model_config_kwargs[
            CONFIG_KEY_ARTIFACTS] = saved_artifacts_config

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    saved_code_subpath = None
    if code_paths is not None:
        saved_code_subpath = "code"
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path,
                               dst=path,
                               dst_dir=saved_code_subpath)

    mlflow.pyfunc.add_to_model(model=mlflow_model,
                               loader_module=__name__,
                               code=saved_code_subpath,
                               env=conda_env_subpath,
                               **custom_model_config_kwargs)
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
Ejemplo n.º 24
0
def save_model(
    pytorch_model,
    path,
    conda_env=None,
    mlflow_model=None,
    code_paths=None,
    pickle_module=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    **kwargs
):
    """
    Save a PyTorch model to a path on the local file system.

    :param pytorch_model: PyTorch model to be saved. Must accept a single ``torch.FloatTensor`` as
                          input and produce a single output tensor. Any code dependencies of the
                          model's class, including the class definition itself, should be
                          included in one of the following locations:

                          - The package(s) listed in the model's Conda environment, specified
                            by the ``conda_env`` parameter.
                          - One or more of the files specified by the ``code_paths`` parameter.

    :param path: Local path where the model is to be saved.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If ``None``, the default
                      :func:`get_default_conda_env()` environment is added to the model. The
                      following is an *example* dictionary representation of a Conda environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pytorch=0.4.1',
                                'torchvision=0.2.1'
                            ]
                        }

    :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
    :param code_paths: A list of local filesystem paths to Python file dependencies (or directories
                       containing file dependencies). These files are *prepended* to the system
                       path when the model is loaded.
    :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified
                          ``pytorch_model``. This is passed as the ``pickle_module`` parameter
                          to ``torch.save()``. By default, this module is also used to
                          deserialize ("unpickle") the PyTorch model at load time.

    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.

    :param kwargs: kwargs to pass to ``torch.save`` method.

    .. code-block:: python
        :caption: Example

        import torch
        import mlflow
        import mlflow.pytorch
        # Create model and set values
        pytorch_model = Model()
        pytorch_model_path = ...
        # train our model
        for epoch in range(500):
            y_pred = pytorch_model(x_data)
            ...
        # Save the model
        with mlflow.start_run() as run:
            mlflow.log_param("epochs", 500)
            mlflow.pytorch.save_model(pytorch_model, pytorch_model_path)
    """
    import torch

    pickle_module = pickle_module or mlflow_pytorch_pickle_module

    if not isinstance(pytorch_model, torch.nn.Module):
        raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module")
    if code_paths is not None:
        if not isinstance(code_paths, list):
            raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths)))
    path = os.path.abspath(path)
    if os.path.exists(path):
        raise RuntimeError("Path '{}' already exists".format(path))

    if mlflow_model is None:
        mlflow_model = Model()

    os.makedirs(path)
    if signature is not None:
        mlflow_model.signature = signature
    if input_example is not None:
        _save_example(mlflow_model, input_example, path)

    model_data_subpath = "data"
    model_data_path = os.path.join(path, model_data_subpath)
    os.makedirs(model_data_path)
    # Persist the pickle module name as a file in the model's `data` directory. This is necessary
    # because the `data` directory is the only available parameter to `_load_pyfunc`, and it
    # does not contain the MLmodel configuration; therefore, it is not sufficient to place
    # the module name in the MLmodel
    #
    # TODO: Stop persisting this information to the filesystem once we have a mechanism for
    # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc`
    pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME)
    with open(pickle_module_path, "w") as f:
        f.write(pickle_module.__name__)
    # Save pytorch model
    model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME)
    torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs)

    conda_env_subpath = "conda.yaml"
    if conda_env is None:
        conda_env = get_default_conda_env()
    elif not isinstance(conda_env, dict):
        with open(conda_env, "r") as f:
            conda_env = yaml.safe_load(f)
    with open(os.path.join(path, conda_env_subpath), "w") as f:
        yaml.safe_dump(conda_env, stream=f, default_flow_style=False)

    if code_paths is not None:
        code_dir_subpath = "code"
        for code_path in code_paths:
            _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath)
    else:
        code_dir_subpath = None

    mlflow_model.add_flavor(
        FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__
    )
    pyfunc.add_to_model(
        mlflow_model,
        loader_module="mlflow.pytorch",
        data=model_data_subpath,
        pickle_module_name=pickle_module.__name__,
        code=code_dir_subpath,
        env=conda_env_subpath,
    )
    mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))