def load_model(client: MlflowClient, model_registry_name: str, stage: str, fallback_stage: str = "Production"): """ Get the model version for the specified stage. Fallback to the production model if no model is available for the specified stage. Args: client (MlflowClient): client for MlFlow server model_registry_name (str): Name of the model in registry stage (str): Get the latest version of the model in the specified stage fallback_stage (str, optional): Fallback stage if no model is found in stage. Defaults to "Production". Raises: NoModelFoundException: No model found in the stage and fallback stage Returns: [type]: The model version object """ models = client.get_latest_versions(model_registry_name, stages=[stage]) if len(models) == 0: models = client.get_latest_versions(model_registry_name, stages=[fallback_stage]) if len(models) == 0: raise deploy.exceptions.NoModelFoundException( f"No model {model_registry_name} found for stage {stage} and for fallback stage {fallback_stage}" ) return models[0]
def run(self): mlflow_client = MlflowClient() _, X_test, _, Y_test = self.data_provider.run() cand_run_ids = self.get_candidate_models() best_cand_roc, best_cand_run_id = self.get_best_model(cand_run_ids, X_test, Y_test) print('Best ROC (candidate models): ', best_cand_roc) try: versions = mlflow_client.get_latest_versions(self.model_name, stages=['Production']) prod_run_ids = [v.run_id for v in versions] best_prod_roc, best_prod_run_id = self.get_best_model(prod_run_ids, X_test, Y_test) except RestException: best_prod_roc = -1 print('ROC (production models): ', best_prod_roc) if best_cand_roc >= best_prod_roc: # deploy new model model_version = mlflow.register_model("runs:/" + best_cand_run_id + "/model", self.model_name) time.sleep(10) try: mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version, stage="Production") print('Deployed version: ', model_version.version) except RestException: time.sleep(15) mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version, stage="Production") print('Deployed version: ', model_version.version) # remove candidate tags for run_id in cand_run_ids: mlflow_client.set_tag(run_id, 'candidate', 'false')
def _parse_model_ref(parsed: ParseResult, client: MlflowClient): model = parsed.hostname path = parsed.path.lstrip("/") if path.isdigit(): mv = client.get_model_version(model, int(path)) run = client.get_run(mv.run_id) return ( "models:/{}/{}".format(model, path), run.data.tags, run.data.params, ) if not path: stage = "none" # TODO allow setting default stage from config else: stage = path.lower() results = client.get_latest_versions(model, stages=[stage]) if not results: raise SpecError( "No versions found for model {} in stage {}".format(model, stage) ) run = client.get_run(results[0].run_id) return ( "models:/{}/{}".format(model, results[0].version), run.data.tags, run.data.params, )
def evaluate_all_candidate_models(): mlflow_client = MlflowClient() cand_run_ids = get_candidate_models() best_cand_metric, best_cand_run_id = get_best_model( cand_run_ids, x_test, y_test) print('Best ROC AUC (candidate models): ', best_cand_metric) try: versions = mlflow_client.get_latest_versions(model_name, stages=['Production']) prod_run_ids = [v.run_id for v in versions] best_prod_metric, best_prod_run_id = get_best_model( prod_run_ids, x_test, y_test) except RestException: best_prod_metric = -1 print('ROC AUC (production models): ', best_prod_metric) if best_cand_metric >= best_prod_metric: # deploy new model model_version = mlflow.register_model( "runs:/" + best_cand_run_id + "/model", model_name) time.sleep(15) mlflow_client.transition_model_version_stage( name=model_name, version=model_version.version, stage="Production") print('Deployed version: ', model_version.version)
def make_predictions(df, model_name, spark): client = MlflowClient() model_udf = mlflow.pyfunc.spark_udf(spark, f"models:/{model_name}/Production") model_version = client.get_latest_versions(model_name, stages=["Production" ])[0].version return (df.withColumn("prediction", model_udf(*df.columns)).withColumn( "model_version", F.lit(model_version)))
def get_underlying_uri(uri): # Note: to support a registry URI that is different from the tracking URI here, # we'll need to add setting of registry URIs via environment variables. from mlflow.tracking import MlflowClient client = MlflowClient() (name, version, stage) = ModelsArtifactRepository._parse_uri(uri) if stage is not None: latest = client.get_latest_versions(name, [stage]) version = latest[0].version return client.get_model_version_download_uri(name, version)
def get_underlying_uri(uri): # Note: to support a registry URI that is different from the tracking URI here, # we'll need to add setting of registry URIs via environment variables. from mlflow.tracking import MlflowClient client = MlflowClient() (name, version, stage) = ModelsArtifactRepository._parse_uri(uri) if stage is not None: latest = client.get_latest_versions(name, [stage]) if len(latest) == 0: raise MlflowException("No versions of model with name '{name}' and " "stage '{stage}' found".format(name=name, stage=stage)) version = latest[0].version return client.get_model_version_download_uri(name, version)
def main(): parser = argparse.ArgumentParser( description="Execute python scripts in Databricks") parser.add_argument("-o", "--output_local_path", help="Output path where the artifacts will be written", required=True) parser.add_argument("-m", "--model_name", help="Model Registry Name", required=True) args = parser.parse_args() model_name = args.model_name output_local_path = args.output_local_path cli_profile_name = "registry" # TODO: Document that we assume that the registry profile will be created in the local machine: # dbutils.fs.put(f"file:///root/.databrickscfg", f"[{cli_profile_name}]\nhost={shard}\ntoken={token}", # overwrite=True) TRACKING_URI = f"databricks://{cli_profile_name}" print(f"TRACKING_URI: {TRACKING_URI}") artifact_path = 'model' from mlflow.tracking import MlflowClient remote_client = MlflowClient(tracking_uri=TRACKING_URI) mlflow.set_tracking_uri(TRACKING_URI) # client = mlflow.tracking.MlflowClient() latest_model = remote_client.get_latest_versions(name=model_name, stages=["staging"]) print(f"Latest Model: {latest_model}") run_id = latest_model[0].run_id artifact_uri = artifact_utils.get_artifact_uri(run_id) print(f"artifact_uri: {artifact_uri}") model_uri = f"runs:/{latest_model[0].run_id}/{artifact_path}" print(f"model_uri: {model_uri}") print(f"Downloading model artifacts to : {output_local_path}") remote_client.download_artifacts(run_id=run_id, path=artifact_path, dst_path=output_local_path)
def handle_model_uri(model_uri, service_name): """ Handle the various types of model uris we could receive. :param model_uri: :type model_uri: str :param service_name: :type service_name: str :return: :rtype: """ client = MlflowClient() if model_uri.startswith("models:/"): model_name = model_uri.split("/")[-2] model_stage_or_version = model_uri.split("/")[-1] if model_stage_or_version in client.get_model_version_stages(None, None): # TODO: Add exception handling for no models found with specified stage model_version = client.get_latest_versions(model_name, [model_stage_or_version])[0].version else: model_version = model_stage_or_version elif (model_uri.startswith("runs:/") or model_uri.startswith("file://")) \ and get_tracking_uri().startswith("azureml") and get_registry_uri().startswith("azureml"): # We will register the model for the user model_name = service_name + "-model" mlflow_model = mlflow_register_model(model_uri, model_name) model_version = mlflow_model.version _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", mlflow_model.name, mlflow_model.version, ) else: raise MlflowException("Unsupported model uri provided, or tracking or registry uris are not set to " "an AzureML uri.") return model_name, model_version
# Databricks notebook source import re from mlflow.tracking import MlflowClient mlflow_client = MlflowClient() ci_holder_name = "cet_debris_detection_cicd" versions = mlflow_client.get_latest_versions(ci_holder_name, stages=["Production"]) assert len(versions) == 1 ci_holder = versions[0] source_run = mlflow_client.get_run(ci_holder.run_id) dist_info = [fi for fi in mlflow_client.list_artifacts(source_run.info.run_id, 'dist') if fi.path.endswith('.whl')] assert len(dist_info) == 1 dist_info = dist_info[0] lib_path = f"{source_run.info.artifact_uri}/{dist_info.path}" lib_path = re.sub(r"^dbfs:/", "/dbfs/", lib_path) job_info = [fi for fi in mlflow_client.list_artifacts(source_run.info.run_id, 'job') if fi.path.endswith('runtime_requirements.txt')] assert len(job_info) == 1 job_info = job_info[0] req_path = f"{source_run.info.artifact_uri}/{job_info.path}" req_path = re.sub(r"^dbfs:/", "/dbfs/", req_path) print(lib_path) print(req_path) %pip install -r $req_path %pip install -U $lib_path
mlflow.register_model( f'runs:/{worst_run["run_id"]}/model', model_name ) # COMMAND ---------- # MAGIC %md # MAGIC Now go ahead and observe the model in the Model Registry: # MAGIC - Click "Models" on the left sidebar # MAGIC - Find your Model (if your username is "yan_moiseev", you should see it as `sensor_status__yan_moiseev`) # MAGIC - Click on "Version 1" # MAGIC - Click on "Stage", transition it to "Production" # COMMAND ---------- production_model = client.get_latest_versions(model_name, ['Production'])[0] model = mlflow.pyfunc.load_model(production_model.source) predictions_worst = model.predict(df) # COMMAND ---------- # MAGIC %md ## 3.2 Now let's add best run we had and assign "Production" tag to it automatically # COMMAND ---------- # Now push best model run to it best_run = mlflow.search_runs(experiment_ids=[experiment_id], order_by=['metrics.macro_avg__f1_score desc']).iloc[0] print('Our best run is:') print(f'\trun_id={best_run["run_id"]}') print(f'\tf1_score={best_run["metrics.macro_avg__f1_score"]}')
from central_model_registry.feature_engineering import engineer_features, rename_columns # use central model registry scope = "demo-cmr" key = "cmr" registry_uri = 'databricks://' + scope + ':' + key mlflow.set_registry_uri(registry_uri) model_name = "demo-cmr" input_path = dbutils.widgets.get("input_path") output_path = dbutils.widgets.get("output_path") # COMMAND ---------- client = MlflowClient() model_udf = mlflow.pyfunc.spark_udf(spark, f"models:/{model_name}/Production") model_version = client.get_latest_versions(model_name, stages=["Production"])[0].version raw_data = spark.read.format("csv").option("header", "true").option( "sep", ";").load(input_path).drop("quality") features = engineer_features(raw_data) data = rename_columns(features) preds = (data.withColumn("prediction", model_udf(*data.columns)).withColumn( "model_version", F.lit(model_version))) preds.write.format("delta").mode("overwrite").save(output_path) # COMMAND ----------
with mlflow.start_run(run_name="Vader Sentiment Analysis"): mlflow.log_param("algorithm", "VADER") mlflow.log_param("total_sentiments", len(INPUT_TEXTS)) mlflow.pyfunc.log_model(artifact_path=model_path, python_model=vader_model, registered_model_name=reg_model_name) # Load the model from the model registry model_uri = f"models:/{reg_model_name}/1" loaded_model = mlflow.pyfunc.load_model(model_uri) # Use inference to predict output from the customized PyFunc model for i, text in enumerate(INPUT_TEXTS): text = INPUT_TEXTS[i]['text'] m_input = pd.DataFrame([text]) scores = loaded_model.predict(m_input) print(f"<{text}> -- {str(scores[0])}") # If required, promote the model to staging client = MlflowClient() versions = client.get_latest_versions(reg_model_name, stages=["Staging"]) # If not in staging, promote to staging print("--") if not versions: mv = client.transition_model_version_stage(reg_model_name, 1, "Staging") print_model_version_info(mv) else: for version in versions: print_model_version_info(version)
mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") with mlflow.start_run() as run2: params = {"n_estimators": 6, "random_state": 42} rfr = RandomForestRegressor(**params).fit([[0, 1]], [1]) mlflow.log_params(params) mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") # Register model name in the model registry name = "RandomForestRegression" client = MlflowClient() client.create_registered_model(name) # Create a two versions of the rfr model under the registered model name for run_id in [run1.info.run_id, run2.info.run_id]: model_uri = "runs:/{}/sklearn-model".format(run_id) mv = client.create_model_version(name, model_uri, run_id) print("model version {} created".format(mv.version)) print("--") # Fetch latest version; this will be version 2 models = client.get_latest_versions(name, stages=["None"]) print_models_info(models) print("--") # Delete the latest model version 2 print("Deleting model version {}\n".format(mv.version)) client.delete_model_version(name, mv.version) models = client.get_latest_versions(name, stages=["None"]) print_models_info(models)
mlflow.set_tracking_uri("sqlite:///mlruns.db") # Create two runs Log MLflow entities with mlflow.start_run() as run1: params = {"n_estimators": 3, "random_state": 42} rfr = RandomForestRegressor(**params).fit([[0, 1]], [1]) mlflow.log_params(params) mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") with mlflow.start_run() as run2: params = {"n_estimators": 6, "random_state": 42} rfr = RandomForestRegressor(**params).fit([[0, 1]], [1]) mlflow.log_params(params) mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") # Register model name in the model registry name = "RandomForestRegression" client = MlflowClient() client.create_registered_model(name) # Create a two versions of the rfr model under the registered model name for run_id in [run1.info.run_id, run2.info.run_id]: model_uri = "runs:/{}/sklearn-model".format(run_id) mv = client.create_model_version(name, model_uri, run_id) print("model version {} created".format(mv.version)) # Fetch latest version; this will be version 2 print("--") print_models_info(client.get_latest_versions(name, stages=["None"]))