Esempio n. 1
0
def load_model(client: MlflowClient,
               model_registry_name: str,
               stage: str,
               fallback_stage: str = "Production"):
    """
    Get the  model version for the specified stage. Fallback to the production model if no model is available for the
    specified stage.
    Args:
        client (MlflowClient): client for MlFlow server
        model_registry_name (str): Name of the model in registry
        stage (str): Get the latest version of the model in the specified stage
        fallback_stage (str, optional): Fallback stage if no model is found in stage. Defaults to "Production".

    Raises:
        NoModelFoundException: No model found in the stage and fallback stage

    Returns:
        [type]: The model version object
    """
    models = client.get_latest_versions(model_registry_name, stages=[stage])

    if len(models) == 0:
        models = client.get_latest_versions(model_registry_name,
                                            stages=[fallback_stage])
        if len(models) == 0:
            raise deploy.exceptions.NoModelFoundException(
                f"No model {model_registry_name} found for stage {stage}  and for fallback stage {fallback_stage}"
            )

    return models[0]
Esempio n. 2
0
    def run(self):
        mlflow_client = MlflowClient()

        _, X_test, _, Y_test = self.data_provider.run()
        cand_run_ids = self.get_candidate_models()
        best_cand_roc, best_cand_run_id = self.get_best_model(cand_run_ids, X_test, Y_test)
        print('Best ROC (candidate models): ', best_cand_roc)

        try:
            versions = mlflow_client.get_latest_versions(self.model_name, stages=['Production'])
            prod_run_ids = [v.run_id for v in versions]
            best_prod_roc, best_prod_run_id = self.get_best_model(prod_run_ids, X_test, Y_test)
        except RestException:
            best_prod_roc = -1
        print('ROC (production models): ', best_prod_roc)

        if best_cand_roc >= best_prod_roc:
            # deploy new model
            model_version = mlflow.register_model("runs:/" + best_cand_run_id + "/model", self.model_name)
            time.sleep(10)
            try:
              mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version,
                                                           stage="Production")
              print('Deployed version: ', model_version.version)
            except RestException:
              time.sleep(15)
              mlflow_client.transition_model_version_stage(name=self.model_name, version=model_version.version,
                                                           stage="Production")
              print('Deployed version: ', model_version.version)
        # remove candidate tags
        for run_id in cand_run_ids:
            mlflow_client.set_tag(run_id, 'candidate', 'false')
Esempio n. 3
0
def _parse_model_ref(parsed: ParseResult, client: MlflowClient):
    model = parsed.hostname
    path = parsed.path.lstrip("/")
    if path.isdigit():
        mv = client.get_model_version(model, int(path))
        run = client.get_run(mv.run_id)
        return (
            "models:/{}/{}".format(model, path),
            run.data.tags,
            run.data.params,
        )
    if not path:
        stage = "none"  # TODO allow setting default stage from config
    else:
        stage = path.lower()
    results = client.get_latest_versions(model, stages=[stage])
    if not results:
        raise SpecError(
            "No versions found for model {} in stage {}".format(model, stage)
        )
    run = client.get_run(results[0].run_id)
    return (
        "models:/{}/{}".format(model, results[0].version),
        run.data.tags,
        run.data.params,
    )
def evaluate_all_candidate_models():
    mlflow_client = MlflowClient()

    cand_run_ids = get_candidate_models()
    best_cand_metric, best_cand_run_id = get_best_model(
        cand_run_ids, x_test, y_test)
    print('Best ROC AUC (candidate models): ', best_cand_metric)

    try:
        versions = mlflow_client.get_latest_versions(model_name,
                                                     stages=['Production'])
        prod_run_ids = [v.run_id for v in versions]
        best_prod_metric, best_prod_run_id = get_best_model(
            prod_run_ids, x_test, y_test)
    except RestException:
        best_prod_metric = -1
    print('ROC AUC (production models): ', best_prod_metric)

    if best_cand_metric >= best_prod_metric:
        # deploy new model
        model_version = mlflow.register_model(
            "runs:/" + best_cand_run_id + "/model", model_name)
        time.sleep(15)
        mlflow_client.transition_model_version_stage(
            name=model_name, version=model_version.version, stage="Production")
        print('Deployed version: ', model_version.version)
def make_predictions(df, model_name, spark):
    client = MlflowClient()
    model_udf = mlflow.pyfunc.spark_udf(spark,
                                        f"models:/{model_name}/Production")
    model_version = client.get_latest_versions(model_name,
                                               stages=["Production"
                                                       ])[0].version

    return (df.withColumn("prediction", model_udf(*df.columns)).withColumn(
        "model_version", F.lit(model_version)))
Esempio n. 6
0
 def get_underlying_uri(uri):
     # Note: to support a registry URI that is different from the tracking URI here,
     # we'll need to add setting of registry URIs via environment variables.
     from mlflow.tracking import MlflowClient
     client = MlflowClient()
     (name, version, stage) = ModelsArtifactRepository._parse_uri(uri)
     if stage is not None:
         latest = client.get_latest_versions(name, [stage])
         version = latest[0].version
     return client.get_model_version_download_uri(name, version)
Esempio n. 7
0
 def get_underlying_uri(uri):
     # Note: to support a registry URI that is different from the tracking URI here,
     # we'll need to add setting of registry URIs via environment variables.
     from mlflow.tracking import MlflowClient
     client = MlflowClient()
     (name, version, stage) = ModelsArtifactRepository._parse_uri(uri)
     if stage is not None:
         latest = client.get_latest_versions(name, [stage])
         if len(latest) == 0:
             raise MlflowException("No versions of model with name '{name}' and "
                                   "stage '{stage}' found".format(name=name, stage=stage))
         version = latest[0].version
     return client.get_model_version_download_uri(name, version)
def main():
    parser = argparse.ArgumentParser(
        description="Execute python scripts in Databricks")
    parser.add_argument("-o",
                        "--output_local_path",
                        help="Output path where the artifacts will be written",
                        required=True)
    parser.add_argument("-m",
                        "--model_name",
                        help="Model Registry Name",
                        required=True)
    args = parser.parse_args()

    model_name = args.model_name
    output_local_path = args.output_local_path

    cli_profile_name = "registry"
    # TODO: Document that we assume that the registry profile will be created in the local machine:
    # dbutils.fs.put(f"file:///root/.databrickscfg", f"[{cli_profile_name}]\nhost={shard}\ntoken={token}",
    #                overwrite=True)

    TRACKING_URI = f"databricks://{cli_profile_name}"
    print(f"TRACKING_URI: {TRACKING_URI}")
    artifact_path = 'model'
    from mlflow.tracking import MlflowClient
    remote_client = MlflowClient(tracking_uri=TRACKING_URI)
    mlflow.set_tracking_uri(TRACKING_URI)
    # client = mlflow.tracking.MlflowClient()
    latest_model = remote_client.get_latest_versions(name=model_name,
                                                     stages=["staging"])
    print(f"Latest Model: {latest_model}")
    run_id = latest_model[0].run_id
    artifact_uri = artifact_utils.get_artifact_uri(run_id)
    print(f"artifact_uri: {artifact_uri}")
    model_uri = f"runs:/{latest_model[0].run_id}/{artifact_path}"
    print(f"model_uri: {model_uri}")

    print(f"Downloading model artifacts to : {output_local_path}")
    remote_client.download_artifacts(run_id=run_id,
                                     path=artifact_path,
                                     dst_path=output_local_path)
Esempio n. 9
0
def handle_model_uri(model_uri, service_name):
    """
    Handle the various types of model uris we could receive.

    :param model_uri:
    :type model_uri: str
    :param service_name:
    :type service_name: str
    :return:
    :rtype:
    """
    client = MlflowClient()

    if model_uri.startswith("models:/"):
        model_name = model_uri.split("/")[-2]
        model_stage_or_version = model_uri.split("/")[-1]
        if model_stage_or_version in client.get_model_version_stages(None, None):
            # TODO: Add exception handling for no models found with specified stage
            model_version = client.get_latest_versions(model_name, [model_stage_or_version])[0].version
        else:
            model_version = model_stage_or_version
    elif (model_uri.startswith("runs:/") or model_uri.startswith("file://")) \
            and get_tracking_uri().startswith("azureml") and get_registry_uri().startswith("azureml"):
        # We will register the model for the user
        model_name = service_name + "-model"
        mlflow_model = mlflow_register_model(model_uri, model_name)
        model_version = mlflow_model.version

        _logger.info(
            "Registered an Azure Model with name: `%s` and version: `%s`",
            mlflow_model.name,
            mlflow_model.version,
        )
    else:
        raise MlflowException("Unsupported model uri provided, or tracking or registry uris are not set to "
                              "an AzureML uri.")

    return model_name, model_version
Esempio n. 10
0
# Databricks notebook source
import re
from mlflow.tracking import MlflowClient
mlflow_client = MlflowClient()
ci_holder_name = "cet_debris_detection_cicd"
versions = mlflow_client.get_latest_versions(ci_holder_name, stages=["Production"])
assert len(versions) == 1
ci_holder = versions[0]
source_run = mlflow_client.get_run(ci_holder.run_id)
dist_info = [fi for fi in mlflow_client.list_artifacts(source_run.info.run_id, 'dist') if fi.path.endswith('.whl')]
assert len(dist_info) == 1
dist_info = dist_info[0]
lib_path = f"{source_run.info.artifact_uri}/{dist_info.path}"
lib_path = re.sub(r"^dbfs:/", "/dbfs/", lib_path)
job_info = [fi for fi in mlflow_client.list_artifacts(source_run.info.run_id, 'job') if fi.path.endswith('runtime_requirements.txt')]
assert len(job_info) == 1
job_info = job_info[0]
req_path = f"{source_run.info.artifact_uri}/{job_info.path}"
req_path = re.sub(r"^dbfs:/", "/dbfs/", req_path)
print(lib_path)
print(req_path)
%pip install -r $req_path
%pip install -U $lib_path
mlflow.register_model(
    f'runs:/{worst_run["run_id"]}/model', model_name
)

# COMMAND ----------

# MAGIC %md 
# MAGIC Now go ahead and observe the model in the Model Registry:
# MAGIC - Click "Models" on the left sidebar
# MAGIC - Find your Model (if your username is "yan_moiseev", you should see it as `sensor_status__yan_moiseev`)
# MAGIC - Click on "Version 1"
# MAGIC - Click on "Stage", transition it to "Production"

# COMMAND ----------

production_model = client.get_latest_versions(model_name, ['Production'])[0]
model = mlflow.pyfunc.load_model(production_model.source)

predictions_worst = model.predict(df)

# COMMAND ----------

# MAGIC %md ## 3.2 Now let's add best run we had and assign "Production" tag to it automatically

# COMMAND ----------

# Now push best model run to it 
best_run = mlflow.search_runs(experiment_ids=[experiment_id], order_by=['metrics.macro_avg__f1_score desc']).iloc[0]
print('Our best run is:')
print(f'\trun_id={best_run["run_id"]}')
print(f'\tf1_score={best_run["metrics.macro_avg__f1_score"]}')
Esempio n. 12
0
from central_model_registry.feature_engineering import engineer_features, rename_columns

# use central model registry
scope = "demo-cmr"
key = "cmr"
registry_uri = 'databricks://' + scope + ':' + key
mlflow.set_registry_uri(registry_uri)

model_name = "demo-cmr"
input_path = dbutils.widgets.get("input_path")
output_path = dbutils.widgets.get("output_path")

# COMMAND ----------

client = MlflowClient()
model_udf = mlflow.pyfunc.spark_udf(spark, f"models:/{model_name}/Production")
model_version = client.get_latest_versions(model_name,
                                           stages=["Production"])[0].version

raw_data = spark.read.format("csv").option("header", "true").option(
    "sep", ";").load(input_path).drop("quality")
features = engineer_features(raw_data)
data = rename_columns(features)

preds = (data.withColumn("prediction", model_udf(*data.columns)).withColumn(
    "model_version", F.lit(model_version)))

preds.write.format("delta").mode("overwrite").save(output_path)

# COMMAND ----------
Esempio n. 13
0
    with mlflow.start_run(run_name="Vader Sentiment Analysis"):
        mlflow.log_param("algorithm", "VADER")
        mlflow.log_param("total_sentiments", len(INPUT_TEXTS))
        mlflow.pyfunc.log_model(artifact_path=model_path,
                                python_model=vader_model,
                                registered_model_name=reg_model_name)
    # Load the model from the model registry
    model_uri = f"models:/{reg_model_name}/1"
    loaded_model = mlflow.pyfunc.load_model(model_uri)

    # Use inference to predict output from the customized PyFunc model
    for i, text in enumerate(INPUT_TEXTS):
        text = INPUT_TEXTS[i]['text']
        m_input = pd.DataFrame([text])
        scores = loaded_model.predict(m_input)
        print(f"<{text}> -- {str(scores[0])}")

    # If required, promote the model to staging
    client = MlflowClient()
    versions = client.get_latest_versions(reg_model_name, stages=["Staging"])

    # If not in staging, promote to staging
    print("--")
    if not versions:
        mv = client.transition_model_version_stage(reg_model_name, 1,
                                                   "Staging")
        print_model_version_info(mv)
    else:
        for version in versions:
            print_model_version_info(version)
Esempio n. 14
0
        mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model")

    with mlflow.start_run() as run2:
        params = {"n_estimators": 6, "random_state": 42}
        rfr = RandomForestRegressor(**params).fit([[0, 1]], [1])
        mlflow.log_params(params)
        mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model")

    # Register model name in the model registry
    name = "RandomForestRegression"
    client = MlflowClient()
    client.create_registered_model(name)

    # Create a two versions of the rfr model under the registered model name
    for run_id in [run1.info.run_id, run2.info.run_id]:
        model_uri = "runs:/{}/sklearn-model".format(run_id)
        mv = client.create_model_version(name, model_uri, run_id)
        print("model version {} created".format(mv.version))
    print("--")

    # Fetch latest version; this will be version 2
    models = client.get_latest_versions(name, stages=["None"])
    print_models_info(models)
    print("--")

    # Delete the latest model version 2
    print("Deleting model version {}\n".format(mv.version))
    client.delete_model_version(name, mv.version)
    models = client.get_latest_versions(name, stages=["None"])
    print_models_info(models)
Esempio n. 15
0
    mlflow.set_tracking_uri("sqlite:///mlruns.db")

    # Create two runs Log MLflow entities
    with mlflow.start_run() as run1:
        params = {"n_estimators": 3, "random_state": 42}
        rfr = RandomForestRegressor(**params).fit([[0, 1]], [1])
        mlflow.log_params(params)
        mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model")

    with mlflow.start_run() as run2:
        params = {"n_estimators": 6, "random_state": 42}
        rfr = RandomForestRegressor(**params).fit([[0, 1]], [1])
        mlflow.log_params(params)
        mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model")

    # Register model name in the model registry
    name = "RandomForestRegression"
    client = MlflowClient()
    client.create_registered_model(name)

    # Create a two versions of the rfr model under the registered model name
    for run_id in [run1.info.run_id, run2.info.run_id]:
        model_uri = "runs:/{}/sklearn-model".format(run_id)
        mv = client.create_model_version(name, model_uri, run_id)
        print("model version {} created".format(mv.version))

    # Fetch latest version; this will be version 2
    print("--")
    print_models_info(client.get_latest_versions(name, stages=["None"]))