def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs): """ Log model using supplied flavor module. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) if registered_model_name is not None: run_id = mlflow.tracking.fluent.active_run().info.run_id mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path), registered_model_name)
def test_register_model_with_unexpected_exception_in_create_registered_model(): create_model_patch = mock.patch.object(MlflowClient, "create_registered_model", side_effect=Exception("Dunno")) with create_model_patch, pytest.raises(Exception): register_model("s3:/some/path/to/model", "Model 1") MlflowClient.create_registered_model.assert_called_once_with("Model 1")
def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs): """ Log model using supplied flavor module. If no run is active, this method will create a new active run. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: (Experimental) If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: (Experimental) :py:class:`ModelSignature` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <infer_signature>` from datasets representing valid model input (e.g. the training dataset) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") signature = infer_signature(train, model.predict(train)) :param input_example: (Experimental) Input example provides one or several examples of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: mlflow.tracking.fluent._record_logged_model(mlflow_model) except MlflowException: # We need to swallow all mlflow exceptions to maintain backwards compatibility with # older tracking servers. Only print out a warning for now. _logger.warning( "Logging model metadata to the tracking server has failed, possibly due older " "server version. The model artifacts have been logged successfully under %s. " "In addition to exporting model artifacts, MLflow clients 1.7.0 and above " "attempt to record model metadata to the tracking store. If logging to a " "mlflow server via REST, consider upgrading the server version to MLflow " "1.7.0 or above.", mlflow.get_artifact_uri(), ) if registered_model_name is not None: run_id = mlflow.tracking.fluent.active_run().info.run_id mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path), registered_model_name)
def workflow(): with mlflow.start_run(run_name="pystock-training") as active_run: mlflow.set_tag("mlflow.runName", "pystock-training") train_run = _run("train_model") evaluate_run = _run("evaluate_model") model_uri = os.path.join(train_run.info.artifact_uri, "model") mlflow.register_model(model_uri, "training-model-psystock") print(model_uri)
def test_register_model_with_non_runs_uri(): create_model_patch = mock.patch.object(MlflowClient, "create_registered_model", return_value=RegisteredModel("Model 1")) create_version_patch = mock.patch.object( MlflowClient, "create_model_version", return_value=ModelVersion("Model 1", "1", creation_timestamp=123)) with create_model_patch, create_version_patch: register_model("s3:/some/path/to/model", "Model 1") MlflowClient.create_registered_model.assert_called_once_with("Model 1") MlflowClient.create_model_version.assert_called_once_with("Model 1", run_id=None, source="s3:/some/path/to/model")
def test_register_model_with_existing_registered_model(): create_model_patch = mock.patch.object(MlflowClient, "create_registered_model", side_effect=MlflowException("Some Message", RESOURCE_ALREADY_EXISTS)) create_version_patch = mock.patch.object( MlflowClient, "create_model_version", return_value=ModelVersion("Model 1", "1", creation_timestamp=123)) with create_model_patch, create_version_patch: register_model("s3:/some/path/to/model", "Model 1") MlflowClient.create_registered_model.assert_called_once_with("Model 1") MlflowClient.create_model_version.assert_called_once_with("Model 1", run_id=None, source="s3:/some/path/to/model")
def test_register_model_with_runs_uri(): create_model_patch = mock.patch.object(MlflowClient, "create_registered_model", return_value=RegisteredModel("Model 1")) get_uri_patch = mock.patch( "mlflow.store.artifact.runs_artifact_repo.RunsArtifactRepository.get_underlying_uri", return_value="s3:/path/to/source") create_version_patch = mock.patch.object( MlflowClient, "create_model_version", return_value=ModelVersion("Model 1", "1", creation_timestamp=123)) with get_uri_patch, create_model_patch, create_version_patch: register_model("runs:/run12345/path/to/model", "Model 1") MlflowClient.create_registered_model.assert_called_once_with("Model 1") MlflowClient.create_model_version.assert_called_once_with("Model 1", "s3:/path/to/source", "run12345")
def test_register_model_raises_exception_with_unsupported_registry_store(): """ This test case ensures that the `register_model` operation fails with an informative error message when the registry store URI refers to a store that does not support Model Registry features (e.g., FileStore). """ with TempDir() as tmp: old_registry_uri = get_registry_uri() try: set_registry_uri(tmp.path()) with pytest.raises(MlflowException) as exc: register_model(model_uri="runs:/1234/some_model", name="testmodel") assert exc.value.error_code == ErrorCode.Name(FEATURE_DISABLED) finally: set_registry_uri(old_registry_uri)
def run(self): mlflow_client = MlflowClient() _, X_test, _, Y_test = self.data_provider.run() cand_run_ids = self.get_candidate_models() best_cand_roc, best_cand_run_id = self.get_best_model(cand_run_ids, X_test, Y_test) print('Best ROC (candidate models): ', best_cand_roc) try: versions = mlflow_client.get_latest_versions(self.model_name, stages=['Production']) prod_run_ids = [v.run_id for v in versions] best_prod_roc, best_prod_run_id = self.get_best_model(prod_run_ids, X_test, Y_test) except RestException: best_prod_roc = -1 print('ROC (production models): ', best_prod_roc) if best_cand_roc >= best_prod_roc: # deploy new model model_version = mlflow.register_model("runs:/" + best_cand_run_id + "/model", self.model_name) time.sleep(10) try: mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version, stage="Production") print('Deployed version: ', model_version.version) except RestException: time.sleep(15) mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version, stage="Production") print('Deployed version: ', model_version.version) # remove candidate tags for run_id in cand_run_ids: mlflow_client.set_tag(run_id, 'candidate', 'false')
def train_model(features: np.ndarray, labels: np.ndarray) -> None: """Train ML model and register with MLflow.""" random_state = np.random.randint(0, 100) run_name = f"pipeline-{get_pipeline_git_commit_hash()}" with mlflow.start_run(run_name=run_name) as training_run: X_train, X_test, y_train, y_test = train_test_split( features, labels, test_size=0.2, stratify=labels, random_state=random_state) log.info("Training iris decision tree classifier.") mlflow.log_param("random_state", random_state) iris_tree_classifier = DecisionTreeClassifier( class_weight="balanced", random_state=random_state) iris_tree_classifier.fit(X_train, y_train) test_data_predictions = iris_tree_classifier.predict(X_test) log_model_metrics(y_test, test_data_predictions) log.info("Registering new model with MLflow.") mlflow.sklearn.log_model(sk_model=iris_tree_classifier, artifact_path=MLFLOW_MODEL_NAME) new_model_metadata = mlflow.register_model( model_uri=f"runs:/{training_run.info.run_id}/{MLFLOW_MODEL_NAME}", name=MLFLOW_MODEL_NAME, ) log.info("Transitioning new model to production.") mlflow.tracking.MlflowClient().transition_model_version_stage( name=MLFLOW_MODEL_NAME, version=int(new_model_metadata.version), stage="Production", )
def evaluate_all_candidate_models(): mlflow_client = MlflowClient() cand_run_ids = get_candidate_models() best_cand_metric, best_cand_run_id = get_best_model( cand_run_ids, x_test, y_test) print('Best ROC AUC (candidate models): ', best_cand_metric) try: versions = mlflow_client.get_latest_versions(model_name, stages=['Production']) prod_run_ids = [v.run_id for v in versions] best_prod_metric, best_prod_run_id = get_best_model( prod_run_ids, x_test, y_test) except RestException: best_prod_metric = -1 print('ROC AUC (production models): ', best_prod_metric) if best_cand_metric >= best_prod_metric: # deploy new model model_version = mlflow.register_model( "runs:/" + best_cand_run_id + "/model", model_name) time.sleep(15) mlflow_client.transition_model_version_stage( name=model_name, version=model_version.version, stage="Production") print('Deployed version: ', model_version.version)
def get_model_details(run_id, model_name, artifact_path): model_name = model_name artifact_path = artifact_path model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path) model_details = mlflow.register_model(model_uri=model_uri, name=model_name) return model_details
def register_best_model(**kwargs): """ Take the best performing model, register it under the BestModel name, and ship it to prod """ run_id = kwargs["ti"].xcom_pull(task_ids="get_best_model", key="best_model_run_id") model_uri = f"runs:/{run_id}/model" model_details = mlflow.register_model(model_uri, "BestModel") # note this doesnt put it in prod, but updates the registered model in the model repo # This is what would make it prod, but probably shouldnt automate this without some testing and eyes on client = MlflowClient() client.transition_model_version_stage(name=model_details.name, version=model_details.version, stage='Production')
def upload(client, PATH, NAME, STAGE): with mlflow.start_run(): mlflow.tensorflow.log_model(tf_saved_model_dir=PATH, tf_meta_graph_tags=None, artifact_path='model', tf_signature_def_key="serving_default") version = mlflow.register_model(mlflow.get_artifact_uri('model'), NAME) wait_until_ready(client, NAME, version.version) client.transition_model_version_stage(name=NAME, version=version.version, stage=STAGE)
def register_best_model(model_name, experiment_name, parent_run_name, metric, order_by="ASC", model_artifact_name="model"): """ Register best model obtained for model `model_name`, experiment `experiment_name` and parent run `parent_run_name`. :param model_name: model name in the Model registry :type model_name: str :param experiment_name: name of the experiment :type experiment_name: str :param parent_run_name: name of the parent run used when running hypeparameter optimization via Hyperopt :type parent_run_name: str :param metric: name of the metric used to optimize our models :type metric: str :param order_by: "ASC" to order metric values by ascending order, "DESC" for descending :type order_by: str :param model_artifact_name: name of the model when saved as an artifact in the Tracking Server :type model_artifact_name: str :return: ModelVersion object associated to the transitioned version :rtype: mlflow.entities.model_registry.ModelVersion """ client = MlflowClient() experiment_id = client.get_experiment_by_name( experiment_name).experiment_id parent_run = client.search_runs( experiment_id, filter_string=f"tags.mlflow.runName = '{parent_run_name}'", order_by=[f"metrics.loss {order_by}"])[0] parent_run_id = parent_run.info.run_id best_run_from_parent_run = client.search_runs( experiment_id, filter_string=f"tags.mlflow.parentRunId = '{parent_run_id}'", order_by=[f"metrics.{metric} {order_by}"])[0] best_model_uri = f"runs:/{best_run_from_parent_run.info.run_id}/{model_artifact_name}" model_details = mlflow.register_model(model_uri=best_model_uri, name=model_name) return model_details
def export_model(model_path, output_path, registered_model_name=None): if registered_model_name: if not model_path.startswith("runs:/") or output_path is not None: # No run specified, so in order to register the model in mlflow, we need # to create a new run and upload the model as an artifact first output_path = output_path or "model" log_model( _CopyModel(model_path), artifact_path=output_path, registered_model_name=registered_model_name, ) else: # Registering a model from an artifact of an existing run mlflow.register_model( model_path, registered_model_name, ) else: # No model name means we only want to save the model locally save_model( _CopyModel(model_path), path=output_path, )
def log(cls, artifact_path, flavor, registered_model_name=None, **kwargs): """ Log model using supplied flavor module. If no run is active, this method will create a new active run. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: Note:: Experimental: This argument may change or be removed in a future release without warning. If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: mlflow.tracking.fluent._record_logged_model(mlflow_model) except MlflowException: # We need to swallow all mlflow exceptions to maintain backwards compatibility with # older tracking servers. Only print out a warning for now. _logger.warning( "Logging model metadata to the tracking server has failed, possibly due older " "server version. The model artifacts have been logged successfully under %s. " "In addition to exporting model artifacts, MLflow clients 1.7.0 and above " "attempt to record model metadata to the tracking store. If logging to a " "mlflow server via REST, consider upgrading the server version to MLflow " "1.7.0 or above.", mlflow.get_artifact_uri()) if registered_model_name is not None: run_id = mlflow.tracking.fluent.active_run().info.run_id mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path), registered_model_name)
def register_model(run, model, model_name): result = mlflow.register_model( "runs:/" + run.info.run_id + "/artifacts/" + model, model_name) description = [] for param in run.data.params: description.append("**{}:** {}\n".format(param, run.data.params[param])) description.append("**Accuracy:** {}".format( client.get_metric_history(run.info.run_id, "accuracy")[0].value)) description.append("**Loss:** {}".format( client.get_metric_history(run.info.run_id, "loss")[0].value)) MlflowClient().update_model_version(name=model_name, version=result.version, description="".join(description))
def promote_to_prod(model_name, run_id): client = MlflowClient() old_model = client.get_latest_versions(model_name, ['Production'])[0] # Step 1: Register to prod new_model = mlflow.register_model(f'runs:/{run_id}/model', model_name) client.transition_model_version_stage(name=model_name, version=new_model.version, stage='Production') print('Moved new model to Production') # Step 2: Move last prod to archive client.transition_model_version_stage(name=model_name, version=old_model.version, stage='Archived') print('Moved old model to Archived') displayHTML( f"<h2>Check your new model <a href='#mlflow/models/sensor_status__yan_moiseev/versions/{new_model.version}'>here</a></h2>" )
def on_train_end(self, *args, **kwargs): """ Log the best model with MLflow and evaluate it on the train and validation data so that the metrics stored with MLflow reflect the logged model. """ self._model.set_weights(self._best_weights) x = self._train train_res = self._model.evaluate(x=x) for name, value in zip(self._model.metrics_names, train_res): mlflow.log_metric("train_{}".format(name), value) x = self._valid valid_res = self._model.evaluate(x=x) for name, value in zip(self._model.metrics_names, valid_res): mlflow.log_metric("valid_{}".format(name), value) mlflow.keras.log_model(self._model, **self._pyfunc_params) run = mlflow.active_run() model_uri = "runs:/{}/model".format(run.info.run_id) print("model_uri: {}".format(model_uri)) mv = mlflow.register_model(model_uri, "KerasFlowerClassifierModel") print("Name: {}".format(mv.name)) print("Version: {}".format(mv.version))
def log( cls, artifact_path, flavor, registered_model_name=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, **kwargs, ): """ Log model using supplied flavor module. If no run is active, this method will create a new active run. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: :py:class:`ModelSignature` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <infer_signature>` from datasets representing valid model input (e.g. the training dataset) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") signature = infer_signature(train, model.predict(train)) :param input_example: Input example provides one or several examples of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: mlflow.tracking.fluent._record_logged_model(mlflow_model) except MlflowException: # We need to swallow all mlflow exceptions to maintain backwards compatibility with # older tracking servers. Only print out a warning for now. _logger.warning(_LOG_MODEL_METADATA_WARNING_TEMPLATE, mlflow.get_artifact_uri()) if registered_model_name is not None: run_id = mlflow.tracking.fluent.active_run().info.run_id mlflow.register_model( "runs:/%s/%s" % (run_id, artifact_path), registered_model_name, await_registration_for=await_registration_for, )
# COMMAND ---------- exampleResults = deserializedPipeline.transform(test_df) display(exampleResults) # COMMAND ---------- # MAGIC %md Register model # MAGIC # MAGIC https://www.mlflow.org/docs/latest/model-registry.html#registering-a-model # MAGIC https://docs.microsoft.com/fr-fr/azure/databricks/applications/machine-learning/manage-model-lifecycle/ # COMMAND ---------- result = mlflow.register_model("runs:<model-path>", "<model-name>") # COMMAND ---------- from mlflow.tracking import MlflowClient client = MlflowClient() name = "spark-lr-registered-model" client.create_registered_model(name) desc = "A new version of the model" model_uri = "runs:/{}/sklearn-model".format(run.info.run_id) mv = client.create_model_version(name, model_uri, run.info.run_id, description=desc)
def log_model( spark_model, artifact_path, conda_env=None, dfs_tmpdir=None, sample_input=None, registered_model_name=None, signature: ModelSignature = None, input_example: ModelInputExample = None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, ): """ Log a Spark MLlib model as an MLflow artifact for the current run. This uses the MLlib persistence format and produces an MLflow Model with the Spark flavor. Note: If no run is active, it will instantiate a run to obtain a run_id. :param spark_model: Spark model to be saved - MLflow can only save descendants of pyspark.ml.Model which implement MLReadable and MLWritable. :param artifact_path: Run relative artifact path. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pyspark=2.3.0' ] } :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is written in this destination and then copied into the model's artifact directory. This is necessary as Spark ML models read from and write to DFS if running on a cluster. If this operation completes successfully, all temporary files created on the DFS are removed. Defaults to ``/tmp/mlflow``. :param sample_input: A sample input used to add the MLeap flavor to the model. This must be a PySpark DataFrame that the model can evaluate. If ``sample_input`` is ``None``, the MLeap flavor is not added. :param registered_model_name: If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} :return: A :py:class:`ModelInfo <mlflow.models.model.ModelInfo>` instance that contains the metadata of the logged model. .. code-block:: python :caption: Example from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression from pyspark.ml.feature import HashingTF, Tokenizer training = spark.createDataFrame([ (0, "a b c d e spark", 1.0), (1, "b d", 0.0), (2, "spark f g h", 1.0), (3, "hadoop mapreduce", 0.0) ], ["id", "text", "label"]) tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") lr = LogisticRegression(maxIter=10, regParam=0.001) pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) model = pipeline.fit(training) mlflow.spark.log_model(model, "spark-model") """ from py4j.protocol import Py4JError _validate_model(spark_model) from pyspark.ml import PipelineModel if not isinstance(spark_model, PipelineModel): spark_model = PipelineModel([spark_model]) run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id run_root_artifact_uri = mlflow.get_artifact_uri() # If the artifact URI is a local filesystem path, defer to Model.log() to persist the model, # since Spark may not be able to write directly to the driver's filesystem. For example, # writing to `file:/uri` will write to the local filesystem from each executor, which will # be incorrect on multi-node clusters - to avoid such issues we just use the Model.log() path # here. if is_local_uri(run_root_artifact_uri): return Model.log( artifact_path=artifact_path, flavor=mlflow.spark, spark_model=spark_model, conda_env=conda_env, dfs_tmpdir=dfs_tmpdir, sample_input=sample_input, registered_model_name=registered_model_name, signature=signature, input_example=input_example, await_registration_for=await_registration_for, pip_requirements=pip_requirements, extra_pip_requirements=extra_pip_requirements, ) model_dir = os.path.join(run_root_artifact_uri, artifact_path) # Try to write directly to the artifact repo via Spark. If this fails, defer to Model.log() # to persist the model try: spark_model.save(posixpath.join(model_dir, _SPARK_MODEL_PATH_SUB)) except Py4JError: return Model.log( artifact_path=artifact_path, flavor=mlflow.spark, spark_model=spark_model, conda_env=conda_env, dfs_tmpdir=dfs_tmpdir, sample_input=sample_input, registered_model_name=registered_model_name, signature=signature, input_example=input_example, await_registration_for=await_registration_for, pip_requirements=pip_requirements, extra_pip_requirements=extra_pip_requirements, ) # Otherwise, override the default model log behavior and save model directly to artifact repo mlflow_model = Model(artifact_path=artifact_path, run_id=run_id) with TempDir() as tmp: tmp_model_metadata_dir = tmp.path() _save_model_metadata( tmp_model_metadata_dir, spark_model, mlflow_model, sample_input, conda_env, signature=signature, input_example=input_example, ) mlflow.tracking.fluent.log_artifacts(tmp_model_metadata_dir, artifact_path) if registered_model_name is not None: mlflow.register_model( "runs:/%s/%s" % (run_id, artifact_path), registered_model_name, await_registration_for, ) return mlflow_model.get_model_info()
index=["validation", "test"])) # COMMAND ---------- # MAGIC %md # MAGIC ## Automatically push model to registry # COMMAND ---------- import time from mlflow.entities.model_registry.model_version_status import ModelVersionStatus artifact_name = "model" artifact_uri = f"runs:/{mlflow_run.info.run_id}/{artifact_name}" mlflow.set_registry_uri(cmr_uri) registered_mdl = mlflow.register_model(artifact_uri, model_name) # Wait until the model is ready def wait_until_ready(model_name, model_version): client = MlflowClient(registry_uri=cmr_uri) for _ in range(20): model_version_details = client.get_model_version( name=model_name, version=model_version, ) status = ModelVersionStatus.from_string(model_version_details.status) print("Model status: %s" % ModelVersionStatus.to_string(status)) if status == ModelVersionStatus.READY: break time.sleep(5)
#model_details = mlflow.register_model(model_uri=model_uri, name=model_registry_name) # COMMAND ---------- ## Create ModelRegistry (only have to do this once) TODO: Can probably delete this code client = MlflowClient() client.create_registered_model(modelRegistryName) # COMMAND ---------- # MAGIC %md # MAGIC ### Register the model associated to the runID to our model Registry # COMMAND ---------- mlflow.register_model("runs:/" + runId + "/model", modelRegistryName) # COMMAND ---------- # DBTITLE 1,Push old model(s) to archive stage if it exists model_list = client.search_model_versions("name='%s'" % modelRegistryName) version_prod_list = [ x.version for x in model_list if x.current_stage == "Production" ] for version in version_prod_list: client.transition_model_version_stage(name=modelRegistryName, version=version, stage="Archived")
mlflow.pyfunc.log_model('pipeline', python_model=ESGBenchmarkAPI(pipeline), conda_env=conda_env, artifacts=artifacts) api_run_id = mlflow.active_run().info.run_id print(api_run_id) # COMMAND ---------- # DBTITLE 1,Register model client = mlflow.tracking.MlflowClient() model_uri = "runs:/{}/pipeline".format(api_run_id) model_name = "esg_lda_benchmark" result = mlflow.register_model(model_uri, model_name) version = result.version print(version) # COMMAND ---------- # MAGIC %md # MAGIC --- # MAGIC + <a href="$./00_esg_context">STAGE0</a>: Home page # MAGIC + <a href="$./01_esg_report">STAGE1</a>: Using NLP to extract key ESG initiatives PDF reports # MAGIC + <a href="$./02_esg_scoring">STAGE2</a>: Introducing a novel approach to ESG scoring using graph analytics # MAGIC + <a href="$./03_esg_market">STAGE3</a>: Applying ESG to market risk calculations # MAGIC --- # COMMAND ----------
# MAGIC %md ###2. Registering Model with Model Registry, a central model repository # COMMAND ---------- run_id.info # COMMAND ---------- import mlflow from mlflow.tracking.client import MlflowClient client = MlflowClient() model_name = "linear-regression-model" artifact_path = "best_model" model_uri = "runs:/{run_id}/{artifact_path}".format(run_id=run_id, artifact_path=artifact_path) registered_model = mlflow.register_model(model_uri=model_uri, name=model_name, await_registration_for=120) #Add model and model version descriptions to Model Registry client.update_model_version( name=registered_model.name, version=registered_model.version, description="This predicts the age of a customer using transaction history." ) #Transition a model version to Staging/Prod/Archived client.transition_model_version_stage( name=registered_model.name, version=registered_model.version, stage='Staging', )
def train_model(df_orig, maxDepth, numTrees): from pyspark.sql.functions import col, to_date import mlflow import mlflow.spark import pandas as pd import uuid from pyspark.ml import Pipeline from pyspark.ml.feature import StringIndexer, VectorAssembler from pyspark.ml.regression import RandomForestRegressor from pyspark.ml.evaluation import RegressionEvaluator from pyspark.sql.functions import lit from mlflow.tracking import MlflowClient # The following dataframe contains the destination airport and the training dates range. They are used for training and testing a dataset in the training dates range. # This is where we measure the performance from. df = (df_orig.filter(df_orig.DEST == airport_code).filter( col("SCHEDULED_DEP_TIME").between(pd.to_datetime(training_start_date), pd.to_datetime(training_end_date)))) # the following dataframe contains only the inference date and the destination airport. It is used for predicting the actual values df_inference = (df_orig.filter(df_orig.DEST == airport_code).filter( to_date(col("SCHEDULED_DEP_TIME")) == inference_date)) dest = airport_code (trainDF, testDF) = df.randomSplit([0.8, 0.2], seed=42) stringIndexer, vecAssembler = load_assemblers(df_orig) with mlflow.start_run(run_name="flights-randomforest-with-regressors-{0}". format(dest)) as run: rf = RandomForestRegressor(featuresCol="features", labelCol="ARR_DELAY", maxDepth=maxDepth, numTrees=numTrees) pipeline = Pipeline(stages=[stringIndexer, vecAssembler, rf]) mlflow.log_param("num_trees", rf.getNumTrees()) mlflow.log_param("max_depth", rf.getMaxDepth()) # Log model pipelineModel = pipeline.fit(trainDF) # it is at this point where the pipeline "modifies" the training dataset and vectorizes it mlflow.spark.log_model(pipelineModel, "{0}_rfr".format(airport_code)) tags = { "training_start_date": training_start_date, "training_end_date": training_end_date } mlflow.set_tags(tags) # Log metrics: RMSE and R2 predDF = pipelineModel.transform(testDF) regressionEvaluator = RegressionEvaluator(predictionCol="prediction", labelCol="ARR_DELAY") rmse = regressionEvaluator.setMetricName("rmse").evaluate(predDF) r2 = regressionEvaluator.setMetricName("r2").evaluate(predDF) mlflow.log_metrics({"rmse": rmse, "r2": r2}) client = MlflowClient() runs = client.search_runs(run.info.experiment_id, order_by=["attributes.start_time desc"], max_results=1) runID = runs[0].info.run_uuid model_name = "rfr_{0}_{1}_{2}_{3}".format(airport_code, training_start_date, training_end_date, inference_date) model_uri = "runs:/{run_id}/{code}_rfr".format(run_id=runID, code=dest) model_details = mlflow.register_model(model_uri=model_uri, name=model_name) # model_details # move this latest version of the model to the Staging if there is a production version # else register it as the production version model_version = dict( client.search_model_versions(f"name='{model_name}'")[0])['version'] model_stage = "Production" for mv in client.search_model_versions(f"name='{model_name}'"): if dict(mv)['current_stage'] == 'Staging': # Archive the currently staged model client.transition_model_version_stage(name=dict(mv)['name'], version=dict(mv)['version'], stage="Archived") model_stage = "Staging" elif dict(mv)['current_stage'] == 'Production': model_stage = "Staging" # move the model to the appropriate stage. client.transition_model_version_stage(name=model_name, version=model_version, stage=model_stage) predicted_inference_DF = pipelineModel.transform(df_inference) # the idea now is to return the predicted delay for each model version and save these things in a table such as the one in notebook 06 RandomForest with Time & Weather. return predicted_inference_DF
client = MlflowClient() runs = client.search_runs(run.info.experiment_id, order_by=["attributes.start_time desc"], max_results=1) run_id = runs[0].info.run_id runs[0].data.metrics # COMMAND ---------- import uuid runID = runs[0].info.run_uuid model_name = f"flight_delay_{uuid.uuid4().hex[:10]}" model_uri = "runs:/{run_id}/model".format(run_id=runID) model_details = mlflow.register_model(model_uri=model_uri, name=model_name) model_details # COMMAND ---------- # MAGIC %md # MAGIC At this point look at the models tab and you can see the model registered. # COMMAND ---------- # MAGIC %md # MAGIC # 4. Load Model and Predict Arrival Delay # COMMAND ---------- # Load saved model with MLflow
mlflow.sklearn.log_model(lr, "model") if is_test: import json # Create some files to preserve as artifacts features = "rooms, zipcode, median_price, school_rating, transport" data = {"state": "TX", "Available": 25, "Type": "Detached"} # Create couple of artifact files under the directory "data" os.makedirs("data", exist_ok=True) with open("data/data.json", 'w', encoding='utf-8') as f: json.dump(data, f, indent=2) with open("data/features.txt", 'w') as f: f.write(features) mlflow.log_artifacts("data", artifact_path="states") mlflow.log_artifact(wine_path) mlflow.log_params({"alpha":alpha, "l1_ratio":l1_ratio}) mlflow.log_metrics({"rmse":rmse, "r2":r2, "mae":mae}) mlflow.log_text("text test", "testtext.txt") if is_test: model_uri = "runs:/{}/model".format(run.info.run_id) mv = mlflow.register_model(model_uri, "ElasticNetRegressionModel") print("Name: {}".format(mv.name)) print("Version: {}".format(mv.version))