Example #1
0
def load_model(model_uri, dfs_tmpdir=None):
    """
    Load the Spark MLlib model from the path.

    :param model_uri: The location, in URI format, of the MLflow model, for example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
                      - ``models:/<model_name>/<model_version>``
                      - ``models:/<model_name>/<stage>``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html#
                      artifact-locations>`_.
    :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local
                       filesystem if running in local mode. The model is loaded from this
                       destination. Defaults to ``/tmp/mlflow``.
    :return: pyspark.ml.pipeline.PipelineModel

    .. code-block:: python
        :caption: Example

        from mlflow import spark
        model = mlflow.spark.load_model("spark-model")
        # Prepare test documents, which are unlabeled (id, text) tuples.
        test = spark.createDataFrame([
            (4, "spark i j k"),
            (5, "l m n"),
            (6, "spark hadoop spark"),
            (7, "apache hadoop")], ["id", "text"])
        # Make predictions on test documents
        prediction = model.transform(test)
    """
    if RunsArtifactRepository.is_runs_uri(model_uri):
        runs_uri = model_uri
        model_uri = RunsArtifactRepository.get_underlying_uri(model_uri)
        _logger.info("'%s' resolved as '%s'", runs_uri, model_uri)
    elif ModelsArtifactRepository.is_models_uri(model_uri):
        runs_uri = model_uri
        model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri)
        _logger.info("'%s' resolved as '%s'", runs_uri, model_uri)
    flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME)
    model_uri = append_to_uri_path(model_uri, flavor_conf["model_data"])
    local_model_path = _download_artifact_from_uri(model_uri)
    _add_code_from_conf_to_system_path(local_model_path, flavor_conf)

    return _load_model(model_uri=model_uri, dfs_tmpdir_base=dfs_tmpdir)
Example #2
0
def load_model(model_uri, dfs_tmpdir=None):
    """
    Load the Spark MLlib model from the path.

    :param model_uri: The location, in URI format, of the MLflow model, for example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html#
                      artifact-locations>`_.
    :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local
                       filesystem if running in local mode. The model is loaded from this
                       destination. Defaults to ``/tmp/mlflow``.
    :return: pyspark.ml.pipeline.PipelineModel

    >>> from mlflow import spark
    >>> model = mlflow.spark.load_model("spark-model")
    >>> # Prepare test documents, which are unlabeled (id, text) tuples.
    >>> test = spark.createDataFrame([
    ...   (4, "spark i j k"),
    ...   (5, "l m n"),
    ...   (6, "spark hadoop spark"),
    ...   (7, "apache hadoop")], ["id", "text"])
    >>>  # Make predictions on test documents.
    >>> prediction = model.transform(test)
    """
    if RunsArtifactRepository.is_runs_uri(model_uri):
        runs_uri = model_uri
        model_uri = RunsArtifactRepository.get_underlying_uri(model_uri)
        _logger.info("'%s' resolved as '%s'", runs_uri, model_uri)
    flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME)
    model_uri = posixpath.join(model_uri, flavor_conf["model_data"])
    return _load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmpdir)