class ModelsClient(): def __init__(self): """ Set up mlflow server connection, including: s3 endpoint, aws, tracking server """ os.environ["MLFLOW_S3_ENDPOINT_URL"] = MLFLOW_S3_ENDPOINT_URL os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY os.environ["MLFLOW_TRACKING_URI"] = MLFLOW_TRACKING_URI self._client = MlflowClient() def log_model(self, name, checkpoint): mlflow.pytorch.log_model(registered_model_name=name, pytorch_model=checkpoint, artifact_path="pytorch-model") def load_model(self, name, version): model = mlflow.pyfunc.load_model(model_uri=f"models:/{name}/{version}") return model def update_model(self, name, new_name): self._client.rename_registered_model(name=name, new_name=new_name) def delete_model(self, name, version): self._client.delete_model_version(name=name, version=version)
from mlflow.tracking import MlflowClient import warnings if __name__ == "__main__": warnings.filterwarnings("ignore") print(mlflow.__version__) if (len(sys.argv) != 2): print("Usage: Need version number for the model") sys.exit(1) # set the tracking server to be localhost with sqlite as tracking store local_registry = "sqlite:///mlruns.db" mlflow.set_tracking_uri(local_registry) print(f"Running local model registry={local_registry}") #model_name="sk-learn-random-forest-reg-model" mode_name = "WeatherForecastModel" version = int(sys.argv[1]) # # Get model name if not regisered, register with model registry # on a local host # client = MlflowClient() client.delete_model_version(name="WeatherForecastModel", version=version) print("=" * 80) [ pprint.pprint(dict(mv), indent=4) for mv in client.search_model_versions("name='sk-learn-random-forest-reg-model'") ]
class ModelsClient(): def __init__(self, tracking_uri=None, registry_uri=None): """ Set up mlflow server connection, including: s3 endpoint, aws, tracking server """ # if setting url in environment variable, # there is no need to set it by MlflowClient() or mlflow.set_tracking_uri() again os.environ[ "MLFLOW_S3_ENDPOINT_URL"] = registry_uri or MLFLOW_S3_ENDPOINT_URL os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY os.environ["MLFLOW_TRACKING_URI"] = tracking_uri or MLFLOW_TRACKING_URI self.client = MlflowClient() def start(self): """ 1. Start a new Mlflow run 2. Direct the logging of the artifacts and metadata to the Run named "worker_i" under Experiment "job_id" 3. If in distributed training, worker and job id would be parsed from environment variable 4. If in local traning, worker and job id will be generated. :return: Active Run """ experiment_name = get_job_id() run_name = get_worker_index() experiment_id = self._get_or_create_experiment(experiment_name) return mlflow.start_run(run_name=run_name, experiment_id=experiment_id) def log_param(self, key, value): mlflow.log_param(key, value) def log_params(self, params): mlflow.log_params(params) def log_metric(self, key, value, step=None): mlflow.log_metric(key, value, step) def log_metrics(self, metrics, step=None): mlflow.log_metrics(metrics, step) def log_model(self, name, checkpoint): mlflow.pytorch.log_model(registered_model_name=name, pytorch_model=checkpoint, artifact_path="pytorch-model") def load_model(self, name, version): model = mlflow.pyfunc.load_model(model_uri=f"models:/{name}/{version}") return model def update_model(self, name, new_name): self.client.rename_registered_model(name=name, new_name=new_name) def delete_model(self, name, version): self.client.delete_model_version(name=name, version=version) def _get_or_create_experiment(self, experiment_name): """ Return the id of experiment. If non-exist, create one. Otherwise, return the existing one. :return: Experiment id """ try: experiment = mlflow.get_experiment_by_name(experiment_name) if experiment is None: # if not found raise MlflowException("No valid experiment has been found") return experiment.experiment_id # if found except MlflowException: experiment = mlflow.create_experiment(name=experiment_name) return experiment
# Databricks notebook source from mlflow.tracking import MlflowClient client = MlflowClient() # COMMAND ---------- path = "/Users/[email protected]/customers/rolls-royce/02_Introduction-to-tracking" model_name = "random-forest-model" # COMMAND ---------- for mv in client.search_model_versions(f"name='{model_name}'"): client.transition_model_version_stage(name=mv.name, version=mv.version, stage="None") client.delete_model_version(name=mv.name, version=mv.version) # COMMAND ---------- experimentID = [ e.experiment_id for e in client.list_experiments() if e.name == path ][0] runs = spark.read.format("mlflow-experiment").load(experimentID) # COMMAND ---------- for run in runs.collect(): client.delete_run(run.run_id) # COMMAND ----------
# Get a list of all registered models print("List of all registered models") print("=" * 80) [ print(pprint.pprint(dict(rm), indent=4)) for rm in client.list_registered_models() ] # Get a list of specific versions of the named models print(f"List of Model = {model_name} and Versions") print("=" * 80) [ pprint.pprint(dict(mv), indent=4) for mv in client.search_model_versions("name='sk-learn-random-forest-reg-model'") ] client.delete_model_version(name="sk-learn-random-forest-reg-model", version=1) print("=" * 80) [ pprint.pprint(dict(mv), indent=4) for mv in client.search_model_versions("name='sk-learn-random-forest-reg-model'") ] client.delete_registered_model(model_name) # # check if all are removed from the registry # print("=" * 80) [ print(pprint.pprint(dict(rm), indent=4)) for rm in client.list_registered_models() ]
# Databricks notebook source from mlflow.tracking import MlflowClient client = MlflowClient() # COMMAND ---------- model_name = "spark-lr-model" # COMMAND ---------- # Delete a registered model along with all its versions client.delete_registered_model(name=model_name) # COMMAND ---------- versions=[1, 2, 3] for version in versions: client.delete_model_version(name=model_name, version=version) # COMMAND ----------
mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") with mlflow.start_run() as run2: params = {"n_estimators": 6, "random_state": 42} rfr = RandomForestRegressor(**params).fit([[0, 1]], [1]) mlflow.log_params(params) mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") # Register model name in the model registry name = "RandomForestRegression" client = MlflowClient() client.create_registered_model(name) # Create a two versions of the rfr model under the registered model name for run_id in [run1.info.run_id, run2.info.run_id]: model_uri = "runs:/{}/sklearn-model".format(run_id) mv = client.create_model_version(name, model_uri, run_id) print("model version {} created".format(mv.version)) print("--") # Fetch latest version; this will be version 2 models = client.get_latest_versions(name, stages=["None"]) print_models_info(models) print("--") # Delete the latest model version 2 print("Deleting model version {}\n".format(mv.version)) client.delete_model_version(name, mv.version) models = client.get_latest_versions(name, stages=["None"]) print_models_info(models)