예제 #1
0
파일: __init__.py 프로젝트: zheya08/mlflow
    def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs):
        """
        Log model using supplied flavor module.

        :param artifact_path: Run relative path identifying the model.
        :param flavor: Flavor module to save the model with. The module must have
                       the ``save_model`` function that will persist the model as a valid
                       MLflow model.
        :param registered_model_name: If given, create a model version under
                                      ``registered_model_name``, also creating a registered model
                                      if one with the given name does not exist.
        :param kwargs: Extra args passed to the model flavor.
        """
        with TempDir() as tmp:
            local_path = tmp.path("model")
            run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
            mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
            flavor.save_model(path=local_path,
                              mlflow_model=mlflow_model,
                              **kwargs)
            mlflow.tracking.fluent.log_artifacts(local_path, artifact_path)
            if registered_model_name is not None:
                run_id = mlflow.tracking.fluent.active_run().info.run_id
                mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path),
                                      registered_model_name)
def test_register_model_with_unexpected_exception_in_create_registered_model():
    create_model_patch = mock.patch.object(MlflowClient,
                                           "create_registered_model",
                                           side_effect=Exception("Dunno"))
    with create_model_patch, pytest.raises(Exception):
        register_model("s3:/some/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
예제 #3
0
    def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs):
        """
        Log model using supplied flavor module. If no run is active, this method will create a new
        active run.

        :param artifact_path: Run relative path identifying the model.
        :param flavor: Flavor module to save the model with. The module must have
                       the ``save_model`` function that will persist the model as a valid
                       MLflow model.
        :param registered_model_name: (Experimental) If given, create a model version under
                                      ``registered_model_name``, also creating a registered model if
                                      one with the given name does not exist.
        :param signature: (Experimental) :py:class:`ModelSignature` describes model input
                          and output :py:class:`Schema <mlflow.types.Schema>`. The model signature
                          can be :py:func:`inferred <infer_signature>` from datasets representing
                          valid model input (e.g. the training dataset) and valid model output
                          (e.g. model predictions generated on the training dataset), for example:

                          .. code-block:: python

                            from mlflow.models.signature import infer_signature
                            train = df.drop_column("target_label")
                            signature = infer_signature(train, model.predict(train))

        :param input_example: (Experimental) Input example provides one or several examples of
                              valid model input. The example can be used as a hint of what data to
                              feed the model. The given example will be converted to a Pandas
                              DataFrame and then serialized to json using the Pandas split-oriented
                              format. Bytes are base64-encoded.

        :param kwargs: Extra args passed to the model flavor.
        """
        with TempDir() as tmp:
            local_path = tmp.path("model")
            run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
            mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
            flavor.save_model(path=local_path,
                              mlflow_model=mlflow_model,
                              **kwargs)
            mlflow.tracking.fluent.log_artifacts(local_path, artifact_path)
            try:
                mlflow.tracking.fluent._record_logged_model(mlflow_model)
            except MlflowException:
                # We need to swallow all mlflow exceptions to maintain backwards compatibility with
                # older tracking servers. Only print out a warning for now.
                _logger.warning(
                    "Logging model metadata to the tracking server has failed, possibly due older "
                    "server version. The model artifacts have been logged successfully under %s. "
                    "In addition to exporting model artifacts, MLflow clients 1.7.0 and above "
                    "attempt to record model metadata to the  tracking store. If logging to a "
                    "mlflow server via REST, consider  upgrading the server version to MLflow "
                    "1.7.0 or above.",
                    mlflow.get_artifact_uri(),
                )
            if registered_model_name is not None:
                run_id = mlflow.tracking.fluent.active_run().info.run_id
                mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path),
                                      registered_model_name)
예제 #4
0
def workflow():
    with mlflow.start_run(run_name="pystock-training") as active_run:
        mlflow.set_tag("mlflow.runName", "pystock-training")
        train_run = _run("train_model")
        evaluate_run = _run("evaluate_model")

        model_uri = os.path.join(train_run.info.artifact_uri, "model")
        mlflow.register_model(model_uri, "training-model-psystock")

        print(model_uri)
def test_register_model_with_non_runs_uri():
    create_model_patch = mock.patch.object(MlflowClient, "create_registered_model",
                                           return_value=RegisteredModel("Model 1"))
    create_version_patch = mock.patch.object(
        MlflowClient, "create_model_version",
        return_value=ModelVersion("Model 1", "1", creation_timestamp=123))
    with create_model_patch, create_version_patch:
        register_model("s3:/some/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
        MlflowClient.create_model_version.assert_called_once_with("Model 1", run_id=None,
                                                                  source="s3:/some/path/to/model")
def test_register_model_with_existing_registered_model():
    create_model_patch = mock.patch.object(MlflowClient, "create_registered_model",
                                           side_effect=MlflowException("Some Message",
                                                                       RESOURCE_ALREADY_EXISTS))
    create_version_patch = mock.patch.object(
        MlflowClient, "create_model_version",
        return_value=ModelVersion("Model 1", "1", creation_timestamp=123))
    with create_model_patch, create_version_patch:
        register_model("s3:/some/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
        MlflowClient.create_model_version.assert_called_once_with("Model 1", run_id=None,
                                                                  source="s3:/some/path/to/model")
def test_register_model_with_runs_uri():
    create_model_patch = mock.patch.object(MlflowClient, "create_registered_model",
                                           return_value=RegisteredModel("Model 1"))
    get_uri_patch = mock.patch(
        "mlflow.store.artifact.runs_artifact_repo.RunsArtifactRepository.get_underlying_uri",
        return_value="s3:/path/to/source")
    create_version_patch = mock.patch.object(
        MlflowClient, "create_model_version",
        return_value=ModelVersion("Model 1", "1", creation_timestamp=123))
    with get_uri_patch, create_model_patch, create_version_patch:
        register_model("runs:/run12345/path/to/model", "Model 1")
        MlflowClient.create_registered_model.assert_called_once_with("Model 1")
        MlflowClient.create_model_version.assert_called_once_with("Model 1", "s3:/path/to/source",
                                                                  "run12345")
def test_register_model_raises_exception_with_unsupported_registry_store():
    """
    This test case ensures that the `register_model` operation fails with an informative error
    message when the registry store URI refers to a store that does not support Model Registry
    features (e.g., FileStore).
    """
    with TempDir() as tmp:
        old_registry_uri = get_registry_uri()
        try:
            set_registry_uri(tmp.path())
            with pytest.raises(MlflowException) as exc:
                register_model(model_uri="runs:/1234/some_model", name="testmodel")
                assert exc.value.error_code == ErrorCode.Name(FEATURE_DISABLED)
        finally:
            set_registry_uri(old_registry_uri)
예제 #9
0
    def run(self):
        mlflow_client = MlflowClient()

        _, X_test, _, Y_test = self.data_provider.run()
        cand_run_ids = self.get_candidate_models()
        best_cand_roc, best_cand_run_id = self.get_best_model(cand_run_ids, X_test, Y_test)
        print('Best ROC (candidate models): ', best_cand_roc)

        try:
            versions = mlflow_client.get_latest_versions(self.model_name, stages=['Production'])
            prod_run_ids = [v.run_id for v in versions]
            best_prod_roc, best_prod_run_id = self.get_best_model(prod_run_ids, X_test, Y_test)
        except RestException:
            best_prod_roc = -1
        print('ROC (production models): ', best_prod_roc)

        if best_cand_roc >= best_prod_roc:
            # deploy new model
            model_version = mlflow.register_model("runs:/" + best_cand_run_id + "/model", self.model_name)
            time.sleep(10)
            try:
              mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version,
                                                           stage="Production")
              print('Deployed version: ', model_version.version)
            except RestException:
              time.sleep(15)
              mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version,
                                                           stage="Production")
              print('Deployed version: ', model_version.version)
        # remove candidate tags
        for run_id in cand_run_ids:
            mlflow_client.set_tag(run_id, 'candidate', 'false')
예제 #10
0
def train_model(features: np.ndarray, labels: np.ndarray) -> None:
    """Train ML model and register with MLflow."""
    random_state = np.random.randint(0, 100)
    run_name = f"pipeline-{get_pipeline_git_commit_hash()}"
    with mlflow.start_run(run_name=run_name) as training_run:
        X_train, X_test, y_train, y_test = train_test_split(
            features,
            labels,
            test_size=0.2,
            stratify=labels,
            random_state=random_state)

        log.info("Training iris decision tree classifier.")
        mlflow.log_param("random_state", random_state)
        iris_tree_classifier = DecisionTreeClassifier(
            class_weight="balanced", random_state=random_state)
        iris_tree_classifier.fit(X_train, y_train)
        test_data_predictions = iris_tree_classifier.predict(X_test)
        log_model_metrics(y_test, test_data_predictions)

        log.info("Registering new model with MLflow.")
        mlflow.sklearn.log_model(sk_model=iris_tree_classifier,
                                 artifact_path=MLFLOW_MODEL_NAME)
        new_model_metadata = mlflow.register_model(
            model_uri=f"runs:/{training_run.info.run_id}/{MLFLOW_MODEL_NAME}",
            name=MLFLOW_MODEL_NAME,
        )

        log.info("Transitioning new model to production.")
        mlflow.tracking.MlflowClient().transition_model_version_stage(
            name=MLFLOW_MODEL_NAME,
            version=int(new_model_metadata.version),
            stage="Production",
        )
예제 #11
0
def evaluate_all_candidate_models():
    mlflow_client = MlflowClient()

    cand_run_ids = get_candidate_models()
    best_cand_metric, best_cand_run_id = get_best_model(
        cand_run_ids, x_test, y_test)
    print('Best ROC AUC (candidate models): ', best_cand_metric)

    try:
        versions = mlflow_client.get_latest_versions(model_name,
                                                     stages=['Production'])
        prod_run_ids = [v.run_id for v in versions]
        best_prod_metric, best_prod_run_id = get_best_model(
            prod_run_ids, x_test, y_test)
    except RestException:
        best_prod_metric = -1
    print('ROC AUC (production models): ', best_prod_metric)

    if best_cand_metric >= best_prod_metric:
        # deploy new model
        model_version = mlflow.register_model(
            "runs:/" + best_cand_run_id + "/model", model_name)
        time.sleep(15)
        mlflow_client.transition_model_version_stage(
            name=model_name, version=model_version.version, stage="Production")
        print('Deployed version: ', model_version.version)
def get_model_details(run_id, model_name, artifact_path):
  model_name = model_name
  artifact_path = artifact_path
  model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path)
  model_details = mlflow.register_model(model_uri=model_uri, name=model_name)

  return model_details
예제 #13
0
def register_best_model(**kwargs):
    """ Take the best performing model, register it under the BestModel name, and ship it to prod """
    run_id = kwargs["ti"].xcom_pull(task_ids="get_best_model", key="best_model_run_id")
    model_uri = f"runs:/{run_id}/model"
    model_details = mlflow.register_model(model_uri, "BestModel")  # note this doesnt put it in prod, but updates the registered model in the model repo

    # This is what would make it prod, but probably shouldnt automate this without some testing and eyes on
    client = MlflowClient()
    client.transition_model_version_stage(name=model_details.name, version=model_details.version, stage='Production')
def upload(client, PATH, NAME, STAGE):
    with mlflow.start_run():
        mlflow.tensorflow.log_model(tf_saved_model_dir=PATH,
                                    tf_meta_graph_tags=None,
                                    artifact_path='model',
                                    tf_signature_def_key="serving_default")
        version = mlflow.register_model(mlflow.get_artifact_uri('model'), NAME)
        wait_until_ready(client, NAME, version.version)
        client.transition_model_version_stage(name=NAME,
                                              version=version.version,
                                              stage=STAGE)
예제 #15
0
def register_best_model(model_name,
                        experiment_name,
                        parent_run_name,
                        metric,
                        order_by="ASC",
                        model_artifact_name="model"):
    """
    Register best model obtained for model `model_name`, experiment `experiment_name` and parent run `parent_run_name`.

    :param model_name: model name in the Model registry
    :type model_name: str

    :param experiment_name: name of the experiment
    :type experiment_name: str

    :param parent_run_name: name of the parent run used when running hypeparameter optimization via Hyperopt
    :type parent_run_name: str

    :param metric: name of the metric used to optimize our models
    :type metric: str

    :param order_by: "ASC" to order metric values by ascending order, "DESC" for descending
    :type order_by: str

    :param model_artifact_name: name of the model when saved as an artifact in the Tracking Server
    :type model_artifact_name: str

    :return: ModelVersion object associated to the transitioned version
    :rtype: mlflow.entities.model_registry.ModelVersion
    """
    client = MlflowClient()

    experiment_id = client.get_experiment_by_name(
        experiment_name).experiment_id

    parent_run = client.search_runs(
        experiment_id,
        filter_string=f"tags.mlflow.runName = '{parent_run_name}'",
        order_by=[f"metrics.loss {order_by}"])[0]
    parent_run_id = parent_run.info.run_id
    best_run_from_parent_run = client.search_runs(
        experiment_id,
        filter_string=f"tags.mlflow.parentRunId = '{parent_run_id}'",
        order_by=[f"metrics.{metric} {order_by}"])[0]

    best_model_uri = f"runs:/{best_run_from_parent_run.info.run_id}/{model_artifact_name}"
    model_details = mlflow.register_model(model_uri=best_model_uri,
                                          name=model_name)

    return model_details
예제 #16
0
파일: model.py 프로젝트: yarenty/ludwig
def export_model(model_path, output_path, registered_model_name=None):
    if registered_model_name:
        if not model_path.startswith("runs:/") or output_path is not None:
            # No run specified, so in order to register the model in mlflow, we need
            # to create a new run and upload the model as an artifact first
            output_path = output_path or "model"
            log_model(
                _CopyModel(model_path),
                artifact_path=output_path,
                registered_model_name=registered_model_name,
            )
        else:
            # Registering a model from an artifact of an existing run
            mlflow.register_model(
                model_path,
                registered_model_name,
            )
    else:
        # No model name means we only want to save the model locally
        save_model(
            _CopyModel(model_path),
            path=output_path,
        )
예제 #17
0
    def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs):
        """
        Log model using supplied flavor module. If no run is active, this method will create a new
        active run.

        :param artifact_path: Run relative path identifying the model.
        :param flavor: Flavor module to save the model with. The module must have
                       the ``save_model`` function that will persist the model as a valid
                       MLflow model.
        :param registered_model_name: Note:: Experimental: This argument may change or be removed
                                      in a future release without warning. If given, create a model
                                      version under ``registered_model_name``, also creating a
                                      registered model if one with the given name does not exist.
        :param kwargs: Extra args passed to the model flavor.
        """
        with TempDir() as tmp:
            local_path = tmp.path("model")
            run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
            mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
            flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)
            mlflow.tracking.fluent.log_artifacts(local_path, artifact_path)
            try:
                mlflow.tracking.fluent._record_logged_model(mlflow_model)
            except MlflowException:
                # We need to swallow all mlflow exceptions to maintain backwards compatibility with
                # older tracking servers. Only print out a warning for now.
                _logger.warning(
                    "Logging model metadata to the tracking server has failed, possibly due older "
                    "server version. The model artifacts have been logged successfully under %s. "
                    "In addition to exporting model artifacts, MLflow clients 1.7.0 and above "
                    "attempt to record model metadata to the  tracking store. If logging to a "
                    "mlflow server via REST, consider  upgrading the server version to MLflow "
                    "1.7.0 or above.", mlflow.get_artifact_uri())
            if registered_model_name is not None:
                run_id = mlflow.tracking.fluent.active_run().info.run_id
                mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path),
                                      registered_model_name)
예제 #18
0
def register_model(run, model, model_name):
    result = mlflow.register_model(
        "runs:/" + run.info.run_id + "/artifacts/" + model, model_name)

    description = []
    for param in run.data.params:
        description.append("**{}:** {}\n".format(param,
                                                 run.data.params[param]))

    description.append("**Accuracy:** {}".format(
        client.get_metric_history(run.info.run_id, "accuracy")[0].value))

    description.append("**Loss:** {}".format(
        client.get_metric_history(run.info.run_id, "loss")[0].value))

    MlflowClient().update_model_version(name=model_name,
                                        version=result.version,
                                        description="".join(description))
예제 #19
0
def promote_to_prod(model_name, run_id):
    client = MlflowClient()
    old_model = client.get_latest_versions(model_name, ['Production'])[0]

    # Step 1: Register to prod
    new_model = mlflow.register_model(f'runs:/{run_id}/model', model_name)
    client.transition_model_version_stage(name=model_name,
                                          version=new_model.version,
                                          stage='Production')
    print('Moved new model to Production')

    # Step 2: Move last prod to archive
    client.transition_model_version_stage(name=model_name,
                                          version=old_model.version,
                                          stage='Archived')
    print('Moved old model to Archived')

    displayHTML(
        f"<h2>Check your new model <a href='#mlflow/models/sensor_status__yan_moiseev/versions/{new_model.version}'>here</a></h2>"
    )
예제 #20
0
 def on_train_end(self, *args, **kwargs):
     """
     Log the best model with MLflow and evaluate it on the train and validation data so that the
     metrics stored with MLflow reflect the logged model.
     """
     self._model.set_weights(self._best_weights)
     x = self._train
     train_res = self._model.evaluate(x=x)
     for name, value in zip(self._model.metrics_names, train_res):
         mlflow.log_metric("train_{}".format(name), value)
     x = self._valid
     valid_res = self._model.evaluate(x=x)
     for name, value in zip(self._model.metrics_names, valid_res):
         mlflow.log_metric("valid_{}".format(name), value)
     mlflow.keras.log_model(self._model, **self._pyfunc_params)
     run = mlflow.active_run()
     model_uri = "runs:/{}/model".format(run.info.run_id)
     print("model_uri: {}".format(model_uri))
     mv = mlflow.register_model(model_uri, "KerasFlowerClassifierModel")
     print("Name: {}".format(mv.name))
     print("Version: {}".format(mv.version))
예제 #21
0
    def log(
        cls,
        artifact_path,
        flavor,
        registered_model_name=None,
        await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
        **kwargs,
    ):
        """
        Log model using supplied flavor module. If no run is active, this method will create a new
        active run.

        :param artifact_path: Run relative path identifying the model.
        :param flavor: Flavor module to save the model with. The module must have
                       the ``save_model`` function that will persist the model as a valid
                       MLflow model.
        :param registered_model_name: If given, create a model version under
                                      ``registered_model_name``, also creating a registered model if
                                      one with the given name does not exist.
        :param signature: :py:class:`ModelSignature` describes model input
                          and output :py:class:`Schema <mlflow.types.Schema>`. The model signature
                          can be :py:func:`inferred <infer_signature>` from datasets representing
                          valid model input (e.g. the training dataset) and valid model output
                          (e.g. model predictions generated on the training dataset), for example:

                          .. code-block:: python

                            from mlflow.models.signature import infer_signature
                            train = df.drop_column("target_label")
                            signature = infer_signature(train, model.predict(train))

        :param input_example: Input example provides one or several examples of
                              valid model input. The example can be used as a hint of what data to
                              feed the model. The given example will be converted to a Pandas
                              DataFrame and then serialized to json using the Pandas split-oriented
                              format. Bytes are base64-encoded.

        :param await_registration_for: Number of seconds to wait for the model version to finish
                            being created and is in ``READY`` status. By default, the function
                            waits for five minutes. Specify 0 or None to skip waiting.

        :param kwargs: Extra args passed to the model flavor.
        """
        with TempDir() as tmp:
            local_path = tmp.path("model")
            run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
            mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
            flavor.save_model(path=local_path,
                              mlflow_model=mlflow_model,
                              **kwargs)
            mlflow.tracking.fluent.log_artifacts(local_path, artifact_path)
            try:
                mlflow.tracking.fluent._record_logged_model(mlflow_model)
            except MlflowException:
                # We need to swallow all mlflow exceptions to maintain backwards compatibility with
                # older tracking servers. Only print out a warning for now.
                _logger.warning(_LOG_MODEL_METADATA_WARNING_TEMPLATE,
                                mlflow.get_artifact_uri())
            if registered_model_name is not None:
                run_id = mlflow.tracking.fluent.active_run().info.run_id
                mlflow.register_model(
                    "runs:/%s/%s" % (run_id, artifact_path),
                    registered_model_name,
                    await_registration_for=await_registration_for,
                )
예제 #22
0
# COMMAND ----------

exampleResults = deserializedPipeline.transform(test_df)
display(exampleResults)

# COMMAND ----------

# MAGIC %md Register model
# MAGIC
# MAGIC https://www.mlflow.org/docs/latest/model-registry.html#registering-a-model
# MAGIC https://docs.microsoft.com/fr-fr/azure/databricks/applications/machine-learning/manage-model-lifecycle/

# COMMAND ----------

result = mlflow.register_model("runs:<model-path>", "<model-name>")

# COMMAND ----------

from mlflow.tracking import MlflowClient

client = MlflowClient()
name = "spark-lr-registered-model"
client.create_registered_model(name)

desc = "A new version of the model"
model_uri = "runs:/{}/sklearn-model".format(run.info.run_id)
mv = client.create_model_version(name,
                                 model_uri,
                                 run.info.run_id,
                                 description=desc)
예제 #23
0
def log_model(
    spark_model,
    artifact_path,
    conda_env=None,
    dfs_tmpdir=None,
    sample_input=None,
    registered_model_name=None,
    signature: ModelSignature = None,
    input_example: ModelInputExample = None,
    await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS,
    pip_requirements=None,
    extra_pip_requirements=None,
):
    """
    Log a Spark MLlib model as an MLflow artifact for the current run. This uses the
    MLlib persistence format and produces an MLflow Model with the Spark flavor.

    Note: If no run is active, it will instantiate a run to obtain a run_id.

    :param spark_model: Spark model to be saved - MLflow can only save descendants of
                        pyspark.ml.Model which implement MLReadable and MLWritable.
    :param artifact_path: Run relative artifact path.
    :param conda_env: Either a dictionary representation of a Conda environment or the path to a
                      Conda environment yaml file. If provided, this decsribes the environment
                      this model should be run in. At minimum, it should specify the dependencies
                      contained in :func:`get_default_conda_env()`. If `None`, the default
                      :func:`get_default_conda_env()` environment is added to the model.
                      The following is an *example* dictionary representation of a Conda
                      environment::

                        {
                            'name': 'mlflow-env',
                            'channels': ['defaults'],
                            'dependencies': [
                                'python=3.7.0',
                                'pyspark=2.3.0'
                            ]
                        }
    :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local
                       filesystem if running in local mode. The model is written in this
                       destination and then copied into the model's artifact directory. This is
                       necessary as Spark ML models read from and write to DFS if running on a
                       cluster. If this operation completes successfully, all temporary files
                       created on the DFS are removed. Defaults to ``/tmp/mlflow``.
    :param sample_input: A sample input used to add the MLeap flavor to the model.
                         This must be a PySpark DataFrame that the model can evaluate. If
                         ``sample_input`` is ``None``, the MLeap flavor is not added.
    :param registered_model_name: If given, create a model version under
                                  ``registered_model_name``, also creating a registered model if one
                                  with the given name does not exist.

    :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>`
                      describes model input and output :py:class:`Schema <mlflow.types.Schema>`.
                      The model signature can be :py:func:`inferred <mlflow.models.infer_signature>`
                      from datasets with valid model input (e.g. the training dataset with target
                      column omitted) and valid model output (e.g. model predictions generated on
                      the training dataset), for example:

                      .. code-block:: python

                        from mlflow.models.signature import infer_signature
                        train = df.drop_column("target_label")
                        predictions = ... # compute model predictions
                        signature = infer_signature(train, predictions)
    :param input_example: Input example provides one or several instances of valid
                          model input. The example can be used as a hint of what data to feed the
                          model. The given example will be converted to a Pandas DataFrame and then
                          serialized to json using the Pandas split-oriented format. Bytes are
                          base64-encoded.
    :param await_registration_for: Number of seconds to wait for the model version to finish
                            being created and is in ``READY`` status. By default, the function
                            waits for five minutes. Specify 0 or None to skip waiting.
    :param pip_requirements: {{ pip_requirements }}
    :param extra_pip_requirements: {{ extra_pip_requirements }}
    :return: A :py:class:`ModelInfo <mlflow.models.model.ModelInfo>` instance that contains the
             metadata of the logged model.

    .. code-block:: python
        :caption: Example

        from pyspark.ml import Pipeline
        from pyspark.ml.classification import LogisticRegression
        from pyspark.ml.feature import HashingTF, Tokenizer
        training = spark.createDataFrame([
            (0, "a b c d e spark", 1.0),
            (1, "b d", 0.0),
            (2, "spark f g h", 1.0),
            (3, "hadoop mapreduce", 0.0) ], ["id", "text", "label"])
        tokenizer = Tokenizer(inputCol="text", outputCol="words")
        hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
        lr = LogisticRegression(maxIter=10, regParam=0.001)
        pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])
        model = pipeline.fit(training)
        mlflow.spark.log_model(model, "spark-model")
    """
    from py4j.protocol import Py4JError

    _validate_model(spark_model)
    from pyspark.ml import PipelineModel

    if not isinstance(spark_model, PipelineModel):
        spark_model = PipelineModel([spark_model])
    run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id
    run_root_artifact_uri = mlflow.get_artifact_uri()
    # If the artifact URI is a local filesystem path, defer to Model.log() to persist the model,
    # since Spark may not be able to write directly to the driver's filesystem. For example,
    # writing to `file:/uri` will write to the local filesystem from each executor, which will
    # be incorrect on multi-node clusters - to avoid such issues we just use the Model.log() path
    # here.
    if is_local_uri(run_root_artifact_uri):
        return Model.log(
            artifact_path=artifact_path,
            flavor=mlflow.spark,
            spark_model=spark_model,
            conda_env=conda_env,
            dfs_tmpdir=dfs_tmpdir,
            sample_input=sample_input,
            registered_model_name=registered_model_name,
            signature=signature,
            input_example=input_example,
            await_registration_for=await_registration_for,
            pip_requirements=pip_requirements,
            extra_pip_requirements=extra_pip_requirements,
        )
    model_dir = os.path.join(run_root_artifact_uri, artifact_path)
    # Try to write directly to the artifact repo via Spark. If this fails, defer to Model.log()
    # to persist the model
    try:
        spark_model.save(posixpath.join(model_dir, _SPARK_MODEL_PATH_SUB))
    except Py4JError:
        return Model.log(
            artifact_path=artifact_path,
            flavor=mlflow.spark,
            spark_model=spark_model,
            conda_env=conda_env,
            dfs_tmpdir=dfs_tmpdir,
            sample_input=sample_input,
            registered_model_name=registered_model_name,
            signature=signature,
            input_example=input_example,
            await_registration_for=await_registration_for,
            pip_requirements=pip_requirements,
            extra_pip_requirements=extra_pip_requirements,
        )

    # Otherwise, override the default model log behavior and save model directly to artifact repo
    mlflow_model = Model(artifact_path=artifact_path, run_id=run_id)
    with TempDir() as tmp:
        tmp_model_metadata_dir = tmp.path()
        _save_model_metadata(
            tmp_model_metadata_dir,
            spark_model,
            mlflow_model,
            sample_input,
            conda_env,
            signature=signature,
            input_example=input_example,
        )
        mlflow.tracking.fluent.log_artifacts(tmp_model_metadata_dir,
                                             artifact_path)
        if registered_model_name is not None:
            mlflow.register_model(
                "runs:/%s/%s" % (run_id, artifact_path),
                registered_model_name,
                await_registration_for,
            )
        return mlflow_model.get_model_info()
예제 #24
0
                     index=["validation", "test"]))

# COMMAND ----------

# MAGIC %md
# MAGIC ## Automatically push model to registry

# COMMAND ----------

import time
from mlflow.entities.model_registry.model_version_status import ModelVersionStatus

artifact_name = "model"
artifact_uri = f"runs:/{mlflow_run.info.run_id}/{artifact_name}"
mlflow.set_registry_uri(cmr_uri)
registered_mdl = mlflow.register_model(artifact_uri, model_name)


# Wait until the model is ready
def wait_until_ready(model_name, model_version):
    client = MlflowClient(registry_uri=cmr_uri)
    for _ in range(20):
        model_version_details = client.get_model_version(
            name=model_name,
            version=model_version,
        )
        status = ModelVersionStatus.from_string(model_version_details.status)
        print("Model status: %s" % ModelVersionStatus.to_string(status))
        if status == ModelVersionStatus.READY:
            break
        time.sleep(5)
예제 #25
0
#model_details = mlflow.register_model(model_uri=model_uri, name=model_registry_name)

# COMMAND ----------

## Create ModelRegistry (only have to do this once) TODO: Can probably delete this code
client = MlflowClient()
client.create_registered_model(modelRegistryName)

# COMMAND ----------

# MAGIC %md
# MAGIC ### Register the model associated to the runID to our model Registry

# COMMAND ----------

mlflow.register_model("runs:/" + runId + "/model", modelRegistryName)

# COMMAND ----------

# DBTITLE 1,Push old model(s) to archive stage if it exists
model_list = client.search_model_versions("name='%s'" % modelRegistryName)

version_prod_list = [
    x.version for x in model_list if x.current_stage == "Production"
]

for version in version_prod_list:
    client.transition_model_version_stage(name=modelRegistryName,
                                          version=version,
                                          stage="Archived")
예제 #26
0
    mlflow.pyfunc.log_model('pipeline',
                            python_model=ESGBenchmarkAPI(pipeline),
                            conda_env=conda_env,
                            artifacts=artifacts)

    api_run_id = mlflow.active_run().info.run_id
    print(api_run_id)

# COMMAND ----------

# DBTITLE 1,Register model
client = mlflow.tracking.MlflowClient()
model_uri = "runs:/{}/pipeline".format(api_run_id)
model_name = "esg_lda_benchmark"
result = mlflow.register_model(model_uri, model_name)
version = result.version
print(version)

# COMMAND ----------

# MAGIC %md
# MAGIC ---
# MAGIC + <a href="$./00_esg_context">STAGE0</a>: Home page
# MAGIC + <a href="$./01_esg_report">STAGE1</a>: Using NLP to extract key ESG initiatives PDF reports
# MAGIC + <a href="$./02_esg_scoring">STAGE2</a>: Introducing a novel approach to ESG scoring using graph analytics
# MAGIC + <a href="$./03_esg_market">STAGE3</a>: Applying ESG to market risk calculations
# MAGIC ---

# COMMAND ----------
# MAGIC %md ###2. Registering Model with Model Registry, a central model repository

# COMMAND ----------

run_id.info

# COMMAND ----------

import mlflow
from mlflow.tracking.client import MlflowClient

client = MlflowClient()
model_name = "linear-regression-model"
artifact_path = "best_model"
model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path)
registered_model = mlflow.register_model(model_uri=model_uri, name=model_name, await_registration_for=120)

#Add model and model version descriptions to Model Registry
client.update_model_version(
  name=registered_model.name,
  version=registered_model.version,
  description="This predicts the age of a customer using transaction history."
)

#Transition a model version to Staging/Prod/Archived
client.transition_model_version_stage(
  name=registered_model.name,
  version=registered_model.version,
  stage='Staging',
)
예제 #28
0
def train_model(df_orig, maxDepth, numTrees):
    from pyspark.sql.functions import col, to_date
    import mlflow
    import mlflow.spark
    import pandas as pd
    import uuid
    from pyspark.ml import Pipeline
    from pyspark.ml.feature import StringIndexer, VectorAssembler
    from pyspark.ml.regression import RandomForestRegressor
    from pyspark.ml.evaluation import RegressionEvaluator
    from pyspark.sql.functions import lit
    from mlflow.tracking import MlflowClient

    # The following dataframe contains the destination airport and the training dates range. They are used for training and testing a dataset in the training dates range.
    # This is where we measure the performance from.
    df = (df_orig.filter(df_orig.DEST == airport_code).filter(
        col("SCHEDULED_DEP_TIME").between(pd.to_datetime(training_start_date),
                                          pd.to_datetime(training_end_date))))
    #   the following dataframe contains only the inference date and the destination airport. It is used for predicting the actual values
    df_inference = (df_orig.filter(df_orig.DEST == airport_code).filter(
        to_date(col("SCHEDULED_DEP_TIME")) == inference_date))
    dest = airport_code
    (trainDF, testDF) = df.randomSplit([0.8, 0.2], seed=42)

    stringIndexer, vecAssembler = load_assemblers(df_orig)
    with mlflow.start_run(run_name="flights-randomforest-with-regressors-{0}".
                          format(dest)) as run:
        rf = RandomForestRegressor(featuresCol="features",
                                   labelCol="ARR_DELAY",
                                   maxDepth=maxDepth,
                                   numTrees=numTrees)
        pipeline = Pipeline(stages=[stringIndexer, vecAssembler, rf])
        mlflow.log_param("num_trees", rf.getNumTrees())
        mlflow.log_param("max_depth", rf.getMaxDepth())
        # Log model
        pipelineModel = pipeline.fit(trainDF)
        # it is at this point where the pipeline "modifies" the training dataset and vectorizes it
        mlflow.spark.log_model(pipelineModel, "{0}_rfr".format(airport_code))

        tags = {
            "training_start_date": training_start_date,
            "training_end_date": training_end_date
        }
        mlflow.set_tags(tags)

        # Log metrics: RMSE and R2
        predDF = pipelineModel.transform(testDF)
        regressionEvaluator = RegressionEvaluator(predictionCol="prediction",
                                                  labelCol="ARR_DELAY")
        rmse = regressionEvaluator.setMetricName("rmse").evaluate(predDF)
        r2 = regressionEvaluator.setMetricName("r2").evaluate(predDF)
        mlflow.log_metrics({"rmse": rmse, "r2": r2})

    client = MlflowClient()
    runs = client.search_runs(run.info.experiment_id,
                              order_by=["attributes.start_time desc"],
                              max_results=1)
    runID = runs[0].info.run_uuid
    model_name = "rfr_{0}_{1}_{2}_{3}".format(airport_code,
                                              training_start_date,
                                              training_end_date,
                                              inference_date)
    model_uri = "runs:/{run_id}/{code}_rfr".format(run_id=runID, code=dest)
    model_details = mlflow.register_model(model_uri=model_uri, name=model_name)
    #   model_details
    # move this latest version of the model to the Staging if there is a production version
    # else register it as the production version

    model_version = dict(
        client.search_model_versions(f"name='{model_name}'")[0])['version']
    model_stage = "Production"
    for mv in client.search_model_versions(f"name='{model_name}'"):
        if dict(mv)['current_stage'] == 'Staging':
            # Archive the currently staged model
            client.transition_model_version_stage(name=dict(mv)['name'],
                                                  version=dict(mv)['version'],
                                                  stage="Archived")
            model_stage = "Staging"
        elif dict(mv)['current_stage'] == 'Production':
            model_stage = "Staging"
    # move the model to the appropriate stage.
    client.transition_model_version_stage(name=model_name,
                                          version=model_version,
                                          stage=model_stage)

    predicted_inference_DF = pipelineModel.transform(df_inference)
    #   the idea now is to return the predicted delay for each model version and save these things in a table such as the one in notebook 06 RandomForest with Time & Weather.
    return predicted_inference_DF
예제 #29
0
client = MlflowClient()
runs = client.search_runs(run.info.experiment_id,
                          order_by=["attributes.start_time desc"],
                          max_results=1)
run_id = runs[0].info.run_id
runs[0].data.metrics

# COMMAND ----------

import uuid

runID = runs[0].info.run_uuid
model_name = f"flight_delay_{uuid.uuid4().hex[:10]}"
model_uri = "runs:/{run_id}/model".format(run_id=runID)

model_details = mlflow.register_model(model_uri=model_uri, name=model_name)
model_details

# COMMAND ----------

# MAGIC %md
# MAGIC At this point look at the models tab and you can see the model registered.

# COMMAND ----------

# MAGIC %md
# MAGIC # 4. Load Model and Predict Arrival Delay

# COMMAND ----------

# Load saved model with MLflow
예제 #30
0
        mlflow.sklearn.log_model(lr, "model")
        
        if is_test:
            import json

            # Create some files to preserve as artifacts
            features = "rooms, zipcode, median_price, school_rating, transport"
            data = {"state": "TX", "Available": 25, "Type": "Detached"}

            # Create couple of artifact files under the directory "data"
            os.makedirs("data", exist_ok=True)
            with open("data/data.json", 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=2)
            with open("data/features.txt", 'w') as f:
                f.write(features)
            
            mlflow.log_artifacts("data", artifact_path="states")
            
            mlflow.log_artifact(wine_path)
            
            mlflow.log_params({"alpha":alpha, "l1_ratio":l1_ratio})
            mlflow.log_metrics({"rmse":rmse, "r2":r2, "mae":mae})
            
            mlflow.log_text("text test", "testtext.txt")
            
    if is_test:
        model_uri = "runs:/{}/model".format(run.info.run_id)
        mv = mlflow.register_model(model_uri, "ElasticNetRegressionModel")
        print("Name: {}".format(mv.name))
        print("Version: {}".format(mv.version))