def log_production_model(config_path): config = read_params(config_path) mlflow_config = config["mlflow_config"] model_name = mlflow_config["registered_model_name"] remote_server_uri = mlflow_config["remote_server_uri"] mlflow.set_registry_uri(remote_server_uri) runs = mlflow.search_runs(experiment_ids=1) lowest = runs["metrics.mae"].sort_values(ascending=True)[0] lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"][0] client = MlflowClient() for mv in client.search_model_versions(f"name='{model_name}'"): mv = dict(mv) if mv["run_id"] == lowest_run_id: current_version = mv["version"] logged_model = mv["source"] pprint(mv, indent=4) client.transition_model_version_stage(name=model_name, version=current_version, stage="Production") else: current_version = mv["version"] client.transition_model_version_stage(name=model_name, version=current_version, stage="Staging") loaded_model = mlflow.pyfunc.load_model(logged_model) model_path = config["webapp_model_dir"] joblib.dump(loaded_model, model_path)
def test_save_mlflow_model_run(self): """ test deploying an MLModel from a tracking server URI """ import mlflow mlflow.set_tracking_uri(self.mlflow_tracking_db) mlflow.set_registry_uri(self.mlflow_registry_db) with mlflow.start_run() as run: model = LinearRegression() X = pd.Series(range(0, 10)) Y = pd.Series(X) * 2 + 3 model.fit(reshaped(X), reshaped(Y)) mlflow.sklearn.log_model(sk_model=model, artifact_path='sklearn-model', registered_model_name='sklearn-model') # simulate a new session on another device (tracking URI comes from repo) mlflow.set_tracking_uri(None) om = self.om # use the tracking URI to store the model as a reference to a MLFlow tracking server meta = om.models.put('mlflow+models://sklearn-model/1', 'sklearn-model') self.assertEqual(meta.kind, MLFlowRegistryBackend.KIND) # simulate a new mlflow session model_ = om.models.get('sklearn-model') self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel) yhat_direct = model_.predict(reshaped(X)) yhat_rt = om.runtime.model('sklearn-model').predict(X).get() assert_array_equal(yhat_rt, yhat_direct)
def test_save_mlflow_model_run_file_tracking(self): """ test deploying an MLModel from a tracking URI using file path (not supported) """ import mlflow om = self.om # note we don't set a tracking uri so mlflow uses a local file path and refused to # work with the file path as a model registry mlflow.set_tracking_uri(None) mlflow.set_registry_uri(None) with self.assertRaises(MlflowException): meta = om.models.put('mlflow+models://sklearn-model/1', 'sklearn-model') self.assertNotIn('sklearn-model', om.models.list())
def test_register_model_raises_exception_with_unsupported_registry_store(): """ This test case ensures that the `register_model` operation fails with an informative error message when the registry store URI refers to a store that does not support Model Registry features (e.g., FileStore). """ with TempDir() as tmp: old_registry_uri = get_registry_uri() try: set_registry_uri(tmp.path()) with pytest.raises(MlflowException) as exc: register_model(model_uri="runs:/1234/some_model", name="testmodel") assert exc.value.error_code == ErrorCode.Name(FEATURE_DISABLED) finally: set_registry_uri(old_registry_uri)
def __init__( self, experiment: str, run: Optional[str] = None, tracking_uri: Optional[str] = None, registry_uri: Optional[str] = None, ) -> None: self.experiment = experiment self.run = run self.tracking_uri = tracking_uri self.registry_uri = registry_uri self._multistage = False mlflow.set_tracking_uri(self.tracking_uri) mlflow.set_registry_uri(self.registry_uri) mlflow.set_experiment(self.experiment) _get_or_start_run(run_name=self.run)
def __init__( self, tracking_uri, experiment, run_name, save_models=False, registry_uri=None, tags=None, ): """ Parameters ---------- tracking_uri: str Address of local or remote tracking server. experiment: str Case sensitive name of an experiment to be activated. run_name: str Name of new run (stored as a mlflow.runName tag). save_models: bool, default=False If ``True``, it will log the estimator into mlflow artifacts registry_uri: str, default=None Address of local or remote model registry server. tags: dict, default=None Dictionary of tag_name: String -> value. """ self.client = mlflow.tracking.MlflowClient() self.tracking_uri = tracking_uri self.experiment = experiment self.run_name = run_name self.save_models = save_models self.tags = tags self.registry_uri = registry_uri mlflow.set_registry_uri(self.registry_uri) mlflow.set_tracking_uri(self.tracking_uri) mlflow.set_experiment(self.experiment) self.experiment_id = mlflow.get_experiment_by_name( self.experiment).experiment_id if self.tags is not None: mlflow.set_tags(self.tags)
def __init__( self, experiment: str, run: Optional[str] = None, tracking_uri: Optional[str] = None, registry_uri: Optional[str] = None, exclude: Optional[List[str]] = None, log_batch_metrics: bool = SETTINGS.log_batch_metrics, log_epoch_metrics: bool = SETTINGS.log_epoch_metrics, ) -> None: super().__init__(log_batch_metrics=log_batch_metrics, log_epoch_metrics=log_epoch_metrics) self.experiment = experiment self.run = run self.tracking_uri = tracking_uri self.registry_uri = registry_uri self.exclude = exclude mlflow.set_tracking_uri(self.tracking_uri) mlflow.set_registry_uri(self.registry_uri) mlflow.set_experiment(self.experiment) _get_or_start_run(run_name=self.run)
pd.DataFrame([skrf_val_metrics, skrf_test_metrics], index=["validation", "test"])) # COMMAND ---------- # MAGIC %md # MAGIC ## Automatically push model to registry # COMMAND ---------- import time from mlflow.entities.model_registry.model_version_status import ModelVersionStatus artifact_name = "model" artifact_uri = f"runs:/{mlflow_run.info.run_id}/{artifact_name}" mlflow.set_registry_uri(cmr_uri) registered_mdl = mlflow.register_model(artifact_uri, model_name) # Wait until the model is ready def wait_until_ready(model_name, model_version): client = MlflowClient(registry_uri=cmr_uri) for _ in range(20): model_version_details = client.get_model_version( name=model_name, version=model_version, ) status = ModelVersionStatus.from_string(model_version_details.status) print("Model status: %s" % ModelVersionStatus.to_string(status)) if status == ModelVersionStatus.READY: break
# # Code snippet for https://mlflow.org/docs/latest/python_api/mlflow.html#set_registry_uri # import warnings import mlflow if __name__ == "__main__": warnings.filterwarnings("ignore") print(mlflow.__version__) mlflow.set_registry_uri("sqlite:////tmp/registry.db") mr_uri = mlflow.get_registry_uri() print("Current registry uri={}".format(mr_uri)) tracking_uri = mlflow.get_tracking_uri() print("Current tracking uri: {}".format(tracking_uri)) assert tracking_uri != mr_uri
from central_model_registry.feature_engineering import engineer_features, rename_columns from central_model_registry.mlflow_utils import register_best_model experiment_name = "/experiments/central-model-registry/v1" model_name = "demo-cmr" input_data_path = "dbfs:/databricks-datasets/wine-quality/winequality-red.csv" now = datetime.now() parent_run_name = now.strftime("%Y%m%d-%H%M") # use central model registry scope = "demo-cmr" key = "cmr" registry_uri = "databricks://" + scope + ":" + key mlflow.set_registry_uri(registry_uri) # COMMAND ---------- def evaluate_hyperparams_wrapper(X_train, X_test, y_train, y_test): def evaluate_hyperparams(params): min_samples_leaf = int(params['min_samples_leaf']) max_depth = params['max_depth'] n_estimators = int(params['n_estimators']) rf = RandomForestRegressor( max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_estimators=n_estimators, )
def _enable_mlflow_exit_handling(self): # restore tracking uri to avoid mlflow exit handler error # https://github.com/mlflow/mlflow/issues/3755 mlflow.set_tracking_uri(self.mlflow_tracking_db) mlflow.set_registry_uri(None)