Esempio n. 1
0
def test_register_model_with_unexpected_exception_in_create_registered_model():
    create_model_patch = mock.patch.object(MlflowClient,
                                           "create_registered_model",
                                           side_effect=Exception("Dunno"))
    with create_model_patch, pytest.raises(Exception):
        register_model("s3:/some/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
Esempio n. 2
0
    def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs):
        """
        Log model using supplied flavor module. If no run is active, this method will create a new
        active run.

        :param artifact_path: Run relative path identifying the model.
        :param flavor: Flavor module to save the model with. The module must have
                       the ``save_model`` function that will persist the model as a valid
                       MLflow model.
        :param registered_model_name: (Experimental) If given, create a model version under
                                      ``registered_model_name``, also creating a registered model if
                                      one with the given name does not exist.
        :param signature: (Experimental) :py:class:`ModelSignature` describes model input
                          and output :py:class:`Schema <mlflow.types.Schema>`. The model signature
                          can be :py:func:`inferred <infer_signature>` from datasets representing
                          valid model input (e.g. the training dataset) and valid model output
                          (e.g. model predictions generated on the training dataset), for example:

                          .. code-block:: python

                            from mlflow.models.signature import infer_signature
                            train = df.drop_column("target_label")
                            signature = infer_signature(train, model.predict(train))

        :param input_example: (Experimental) Input example provides one or several examples of
                              valid model input. The example can be used as a hint of what data to
                              feed the model. The given example will be converted to a Pandas
                              DataFrame and then serialized to json using the Pandas split-oriented
                              format. Bytes are base64-encoded.

        :param kwargs: Extra args passed to the model flavor.
        """
        with TempDir() as tmp:
            local_path = tmp.path("model")
            run_id = kiwi.tracking.fluent._get_or_start_run().info.run_id
            mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
            flavor.save_model(path=local_path, mlflow_model=mlflow_model,
                              **kwargs)
            kiwi.tracking.fluent.log_artifacts(local_path, artifact_path)
            try:
                kiwi.tracking.fluent._record_logged_model(mlflow_model)
            except MlflowException:
                # We need to swallow all mlflow exceptions to maintain backwards compatibility with
                # older tracking servers. Only print out a warning for now.
                _logger.warning(
                    "Logging model metadata to the tracking server has failed, possibly due older "
                    "server version. The model artifacts have been logged successfully under %s. "
                    "In addition to exporting model artifacts, MLflow clients 1.7.0 and above "
                    "attempt to record model metadata to the  tracking store. If logging to a "
                    "mlflow server via REST, consider  upgrading the server version to MLflow "
                    "1.7.0 or above.", kiwi.get_artifact_uri())
            if registered_model_name is not None:
                run_id = kiwi.tracking.fluent.active_run().info.run_id
                kiwi.register_model("runs:/%s/%s" % (run_id, artifact_path),
                                    registered_model_name)
Esempio n. 3
0
def test_register_model_with_non_runs_uri():
    create_model_patch = mock.patch.object(
        MlflowClient,
        "create_registered_model",
        return_value=RegisteredModel("Model 1"))
    create_version_patch = mock.patch.object(MlflowClient,
                                             "create_model_version",
                                             return_value=ModelVersion(
                                                 "Model 1",
                                                 "1",
                                                 creation_timestamp=123))
    with create_model_patch, create_version_patch:
        register_model("s3:/some/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
        MlflowClient.create_model_version.assert_called_once_with(
            "Model 1", run_id=None, source="s3:/some/path/to/model")
Esempio n. 4
0
def test_register_model_raises_exception_with_unsupported_registry_store():
    """
    This test case ensures that the `register_model` operation fails with an informative error
    message when the registry store URI refers to a store that does not support Model Registry
    features (e.g., FileStore).
    """
    with TempDir() as tmp:
        old_registry_uri = get_registry_uri()
        try:
            set_registry_uri(tmp.path())
            with pytest.raises(MlflowException) as exc:
                register_model(model_uri="runs:/1234/some_model",
                               name="testmodel")
                assert exc.value.error_code == ErrorCode.Name(FEATURE_DISABLED)
        finally:
            set_registry_uri(old_registry_uri)
Esempio n. 5
0
def test_register_model_with_existing_registered_model():
    create_model_patch = mock.patch.object(MlflowClient,
                                           "create_registered_model",
                                           side_effect=MlflowException(
                                               "Some Message",
                                               RESOURCE_ALREADY_EXISTS))
    create_version_patch = mock.patch.object(MlflowClient,
                                             "create_model_version",
                                             return_value=ModelVersion(
                                                 "Model 1",
                                                 "1",
                                                 creation_timestamp=123))
    with create_model_patch, create_version_patch:
        register_model("s3:/some/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
        MlflowClient.create_model_version.assert_called_once_with(
            "Model 1", run_id=None, source="s3:/some/path/to/model")
Esempio n. 6
0
def test_register_model_with_runs_uri():
    create_model_patch = mock.patch.object(
        MlflowClient,
        "create_registered_model",
        return_value=RegisteredModel("Model 1"))
    get_uri_patch = mock.patch(
        "mlflow.store.artifact.runs_artifact_repo.RunsArtifactRepository.get_underlying_uri",
        return_value="s3:/path/to/source")
    create_version_patch = mock.patch.object(MlflowClient,
                                             "create_model_version",
                                             return_value=ModelVersion(
                                                 "Model 1",
                                                 "1",
                                                 creation_timestamp=123))
    with get_uri_patch, create_model_patch, create_version_patch:
        register_model("runs:/run12345/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
        MlflowClient.create_model_version.assert_called_once_with(
            "Model 1", "s3:/path/to/source", "run12345")
Esempio n. 7
0
def log_model(spark_model,
              artifact_path,
              conda_env=None,
              dfs_tmpdir=None,
              sample_input=None,
              registered_model_name=None,
              signature: ModelSignature = None,
              input_example: ModelInputExample = None):
    """
    Log a Spark MLlib model as an MLflow artifact for the current run. This uses the
    MLlib persistence format and produces an MLflow Model with the Spark flavor.

    Note: If no run is active, it will instantiate a run to obtain a run_id.

    :param spark_model: Spark model to be saved - MLflow can only save descendants of
                        pyspark.ml.Model which implement MLReadable and MLWritable.
    :param artifact_path: Run relative artifact path.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If `None`, the default
                      :func:`get_default_conda_env()` environment is added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pyspark=2.3.0'
                            ]
                        }
    :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local
                       filesystem if running in local mode. The model is written in this
                       destination and then copied into the model's artifact directory. This is
                       necessary as Spark ML models read from and write to DFS if running on a
                       cluster. If this operation completes successfully, all temporary files
                       created on the DFS are removed. Defaults to ``/tmp/mlflow``.
    :param sample_input: A sample input used to add the MLeap flavor to the model.
                         This must be a PySpark DataFrame that the model can evaluate. If
                         ``sample_input`` is ``None``, the MLeap flavor is not added.
    :param registered_model_name: (Experimental) If given, create a model version under
                                  ``registered_model_name``, also creating a registered model if one
                                  with the given name does not exist.

    :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: (Experimental) Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.



    .. code-block:: python
        :caption: Example

        from pyspark.ml import Pipeline
        from pyspark.ml.classification import LogisticRegression
        from pyspark.ml.feature import HashingTF, Tokenizer
        training = spark.createDataFrame([
            (0, "a b c d e spark", 1.0),
            (1, "b d", 0.0),
            (2, "spark f g h", 1.0),
            (3, "hadoop mapreduce", 0.0) ], ["id", "text", "label"])
        tokenizer = Tokenizer(inputCol="text", outputCol="words")
        hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
        lr = LogisticRegression(maxIter=10, regParam=0.001)
        pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
        model = pipeline.fit(training)
        mlflow.spark.log_model(model, "spark-model")
    """
    from py4j.protocol import Py4JJavaError

    _validate_model(spark_model)
    from pyspark.ml import PipelineModel
    if not isinstance(spark_model, PipelineModel):
        spark_model = PipelineModel([spark_model])
    run_id = kiwi.tracking.fluent._get_or_start_run().info.run_id
    run_root_artifact_uri = kiwi.get_artifact_uri()
    # If the artifact URI is a local filesystem path, defer to Model.log() to persist the model,
    # since Spark may not be able to write directly to the driver's filesystem. For example,
    # writing to `file:/uri` will write to the local filesystem from each executor, which will
    # be incorrect on multi-node clusters - to avoid such issues we just use the Model.log() path
    # here.
    if is_local_uri(run_root_artifact_uri):
        return Model.log(artifact_path=artifact_path,
                         flavor=kiwi.spark,
                         spark_model=spark_model,
                         conda_env=conda_env,
                         dfs_tmpdir=dfs_tmpdir,
                         sample_input=sample_input,
                         registered_model_name=registered_model_name)
    # If Spark cannot write directly to the artifact repo, defer to Model.log() to persist the
    # model
    model_dir = os.path.join(run_root_artifact_uri, artifact_path)
    try:
        spark_model.save(os.path.join(model_dir, _SPARK_MODEL_PATH_SUB))
    except Py4JJavaError:
        return Model.log(artifact_path=artifact_path,
                         flavor=kiwi.spark,
                         spark_model=spark_model,
                         conda_env=conda_env,
                         dfs_tmpdir=dfs_tmpdir,
                         sample_input=sample_input,
                         registered_model_name=registered_model_name,
                         signature=signature,
                         input_example=input_example)

    # Otherwise, override the default model log behavior and save model directly to artifact repo
    mlflow_model = Model(artifact_path=artifact_path, run_id=run_id)
    with TempDir() as tmp:
        tmp_model_metadata_dir = tmp.path()
        _save_model_metadata(tmp_model_metadata_dir,
                             spark_model,
                             mlflow_model,
                             sample_input,
                             conda_env,
                             signature=signature,
                             input_example=input_example)
        kiwi.tracking.fluent.log_artifacts(tmp_model_metadata_dir,
                                           artifact_path)
        if registered_model_name is not None:
            kiwi.register_model("runs:/%s/%s" % (run_id, artifact_path),
                                registered_model_name)