def _save(self, data: MetricsDict) -> None: """Save given MLflow metrics dataset and log it in MLflow as metrics. Args: data (MetricsDict): MLflow metrics dataset. """ client = MlflowClient() try: run_id = self.run_id except DataSetError: # If run_id can't be found log_metric would create new run. run_id = None log_metric = ( partial(client.log_metric, run_id) if run_id is not None else mlflow.log_metric ) metrics = ( self._build_args_list_from_metric_item(k, v) for k, v in data.items() ) for k, v, i in chain.from_iterable(metrics): log_metric(k, v, step=i)
def print_experiment_details(experiment_id, run_id): """ Method to print experiment run info and a specific run details :param experiment_id: MLflow experiment ID :param run_id: MLflow run ID within an experiment :return: none """ print("Finished MLflow Run with run_id {} and experiment_id {}".format( run_id, experiment_id)) # Use MlflowClient API to list experiments and run info client = MlflowClient() print("=" * 80) # Get a list of all experiments print("List of all Experiments") print("=" * 80) [ print(pprint.pprint(dict(exp), indent=4)) for exp in client.list_experiments() ] print("=" * 80) print(f"List Run info for run_id={run_id}") print(pprint.pprint(dict(mlflow.get_run(run_id))))
def test_metric_name(tmpdir: py.path.local) -> None: tracking_file_name = "file:{}".format(tmpdir) metric_name = "my_metric_name" mlflc = MLflowCallback(tracking_uri=tracking_file_name, metric_name=metric_name) study = optuna.create_study(study_name="my_study") study.optimize(_objective_func, n_trials=3, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_file_name) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert metric_name in first_run_dict["data"]["metrics"]
def __init__(self, name, verbose=False, artifacts_folder='.'): self._name = name self._params = dict() self._artifacts = list() self._metrics = dict() self._verbose = verbose self._client = MlflowClient() self._n_partitions = None self._n_samples = None self._tags = {} self._imgs_path = None self._model_path = None self._artifacts_folders = None self.create_artifacts_folders(artifacts_folder) self._actual_experiment_id = _get_experiment_id() logging.basicConfig( format= 'Date-Time : %(asctime)s : Line No. : %(lineno)d - %(message)s', level=logging.DEBUG) self.logger = logging.getLogger(__name__) if self._verbose: msg = 'New MlLogs object create.' self.logger.info(msg)
def _save(self, data: float): if self._logging_activated: self._validate_run_id() run_id = ( self.run_id ) # we access it once instead of calling self.run_id everywhere to avoid looking or an active run each time mlflow_client = MlflowClient() # get the metric history if it has been saved previously to ensure # to retrieve the right data # reminder: this is True even if no run_id was originally specified but a run is active metric_history = (mlflow_client.get_metric_history( run_id=run_id, key=self.key) if self._exists() else []) save_args = deepcopy(self._save_args) step = save_args.pop("step", None) if step is None: if self.mode == "overwrite": step = max([metric.step for metric in metric_history], default=0) elif self.mode == "append": # I put a max([]) default to -1 so that default "step" equals 0 step = (max([metric.step for metric in metric_history], default=-1) + 1) else: raise ValueError( f"save_args['mode'] must be one of {self.SUPPORTED_SAVE_MODES}, got '{self.mode}' instead." ) mlflow_client.log_metric( run_id=run_id, key=self.key, value=data, step=step, **save_args, )
def log_production_model(config_path): config = read_params(config_path) mlflow_config = config["mlflow_config"] model_name = mlflow_config["registered_model_name"] remote_server_uri = mlflow_config["remote_server_uri"] mlflow.set_tracking_uri(remote_server_uri) runs = mlflow.search_runs(experiment_ids=1) lowest = runs["metrics.mae"].sort_values(ascending=True)[0] lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"][0] client = MlflowClient() for mv in client.search_model_versions(f"name='{model_name}'"): mv = dict(mv) if mv["run_id"] == lowest_run_id: current_version = mv["version"] logged_model = mv["source"] pprint(mv, indent=4) client.transition_model_version_stage(name=model_name, version=current_version, stage="Production") else: current_version = mv["version"] client.transition_model_version_stage(name=model_name, version=current_version, stage="Staging") loaded_model = mlflow.pyfunc.load_model(logged_model) model_path = config["webapp_model_dir"] # "prediction_service/model" joblib.dump(loaded_model, model_path)
def test_is_versioned_dataset_logged_correctly_in_mlflow( tmp_path, tracking_uri, df1): """Check if versioned dataset is logged correctly in MLflow as artifact. For versioned datasets just artifacts from current run should be logged. """ mlflow.set_tracking_uri(tracking_uri.as_uri()) mlflow_client = MlflowClient(tracking_uri=tracking_uri.as_uri()) with mlflow.start_run(): run_id = mlflow.active_run().info.run_id mlflow_csv_dataset = MlflowArtifactDataSet(data_set=dict( type=CSVDataSet, filepath=(tmp_path / "df1.csv").as_posix(), versioned=True, ), # run_id=run_id, ) mlflow_csv_dataset.save(df1) run_artifacts = [ fileinfo.path for fileinfo in mlflow_client.list_artifacts(run_id=run_id) ] # Check if just one artifact was created in given run. assert len(run_artifacts) == 1 artifact_path = mlflow_client.download_artifacts(run_id=run_id, path=run_artifacts[0]) # Check if saved artifact is file and not folder where versioned datasets are stored. assert Path(artifact_path).is_file() assert df1.equals(mlflow_csv_dataset.load()) # and must loadable
def configure( self, run_uuid, experiment_name, tracking_uri, always_log_artifacts=False, create_run=True, create_experiment=True, nest_run=True, ): if mlflow.active_run() and not nest_run: logger.info('Ending previous MLFlow run: {}.'.format(self.run_uuid)) mlflow.end_run() self.always_log_artifacts = always_log_artifacts self._experiment_name = experiment_name # MLflow specific if tracking_uri: mlflow.set_tracking_uri(tracking_uri) if run_uuid: existing_run = MlflowClient().get_run(run_uuid) if not existing_run and not create_run: raise FileNotFoundError( 'Run ID {} not found under {}'.format( run_uuid, mlflow.get_tracking_uri() ) ) experiment_id = self._retrieve_mlflow_experiment_id( experiment_name, create=create_experiment ) return mlflow.start_run( run_uuid=run_uuid, experiment_id=experiment_id, nested=nest_run )
def get_best_runs_by_model(experience_name): client = MlflowClient() experiments = client.list_experiments() experiment = next(ex for ex in experiments if ex.name == experience_name) experiment_id = experiment.experiment_id best_runs_by_model = [] for model in models: for params in product_dict(**model["params"]): filter_string = f'params.model="{model["name"]}"' for param_key, param_value in params.items(): filter_string += f' and params.{param_key}="{param_value}"' runs = client.search_runs( experiment_id, filter_string, order_by=["metric.auc DESC"], max_results=1, ) if not runs: continue run = runs[0] run_name = run.data.tags["mlflow.runName"] for idx, (param_key, param_value) in enumerate(params.items()): if idx == 0: run_name += f" | " else: run_name += f", " run_name += f"{param_key}={param_value}" best_runs_by_model.append({"name": run_name, "run": runs[0]}) return best_runs_by_model
def test_create_model_version_explicitly_set_run_link(mock_registry_store): run_id = "runid" run_link = "my-run-link" hostname = "https://workspace.databricks.com/" workspace_id = "10002" mock_registry_store.create_model_version.return_value = ModelVersion( "name", 1, 0, 1, source="source", run_id=run_id, run_link=run_link) # mocks to make sure that even if you're in a notebook, this setting is respected. with mock.patch( "mlflow.tracking.client.is_in_databricks_notebook", return_value=True), mock.patch( "mlflow.tracking.client.get_workspace_info_from_dbutils", return_value=(hostname, workspace_id), ): client = MlflowClient(tracking_uri="databricks", registry_uri="otherplace") model_version = client.create_model_version("name", "source", "runid", run_link=run_link) assert model_version.run_link == run_link # verify that the store was provided with the explicitly passed in run link mock_registry_store.create_model_version.assert_called_once_with( "name", "source", "runid", [], run_link, None)
def test_hyperopt_ray_mlflow(csv_filename, tmpdir): with ray_start_4_cpus(): mlflow_uri = f"file://{tmpdir}/mlruns" mlflow.set_tracking_uri(mlflow_uri) client = MlflowClient(tracking_uri=mlflow_uri) num_samples = 2 config = _get_config({"type": "ray", "num_samples": num_samples}, {"type": "ray"}) rel_path = generate_data(config["input_features"], config["output_features"], csv_filename) exp_name = "mlflow_test" run_hyperopt(config, rel_path, experiment_name=exp_name, callbacks=[MlflowCallback(mlflow_uri)]) experiment = client.get_experiment_by_name(exp_name) assert experiment is not None runs = client.search_runs([experiment.experiment_id]) assert len(runs) > 0 for run in runs: artifacts = [f.path for f in client.list_artifacts(run.info.run_id, "")] assert "config.yaml" in artifacts assert "model" in artifacts
def _load(self): self._validate_run_id() mode = self._load_args.get("mode", "list") mlflow_client = MlflowClient() metric_history = mlflow_client.get_metric_history(self.run_id, key=self.key) if mode == "list": simplified_history = [metric.value for metric in metric_history] elif mode == "dict": simplified_history = { metric.step: metric.value for metric in metric_history } elif mode == "history": # history is a list of dict whom keys are "log_metric" arguments. The following is equivalent to dict mode: # [{"step": 0, "value": 0.1}, {"step": 1, "value": 0.2}, {"step": 2, "value": 0.3}] simplified_history = [{ "step": metric.step, "value": metric.value, "timestamp": metric.timestamp, } for metric in metric_history] return simplified_history
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf): # create an experiment with the same name and then delete it mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri() mlflow_client = MlflowClient(mlflow_tracking_uri) mlflow_client.create_experiment("exp1") mlflow_client.delete_experiment( mlflow_client.get_experiment_by_name("exp1").experiment_id ) # the config must restore properly the experiment config = KedroMlflowConfig( server=dict(mlflow_tracking_uri="mlruns"), tracking=dict(experiment=dict(name="exp1")), ) bootstrap_project(kedro_project_with_mlflow_conf) with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session: context = session.load_context() # setup config config.setup(context) assert "exp1" in [ exp.name for exp in config.server._mlflow_client.list_experiments() ]
def setup_mlflow_tracking(self, URI, experiment_name, run_name): # select URI for server tracking set_tracking_uri(uri=URI) if is_tracking_uri_set(): logging.debug('MLFlow URI: ' + str(get_tracking_uri())) # CRUD interface self.client = MlflowClient(tracking_uri=get_tracking_uri()) # Experiment setup if self.client.get_experiment_by_name(name=experiment_name) is None: exp_id = self.client.create_experiment(name=experiment_name) else: exp = self.client.get_experiment_by_name(name=experiment_name) exp_id = exp.experiment_id # Run setup mlflow.start_run(experiment_id=exp_id, run_name=run_name) self.run_id = mlflow.active_run().info.run_id data = self.client.get_run(mlflow.active_run().info.run_id).data logging.info('MLFlow tracking started - Experiment: ' + str(experiment_name) + " - Run: " + str(data.tags["mlflow.runName"]))
def __init__( self, experiment_name: str = 'default', tracking_uri: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, save_dir: Optional[str] = './mlruns', prefix: str = '', ): if mlflow is None: raise ImportError( 'You want to use `mlflow` logger which is not installed yet,' ' install it with `pip install mlflow`.' ) super().__init__() if not tracking_uri: tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}' self._experiment_name = experiment_name self._experiment_id = None self._tracking_uri = tracking_uri self._run_id = None self.tags = tags self._prefix = prefix self._mlflow_client = MlflowClient(tracking_uri)
def __init__(self, experiment_name: str = 'default', run_name: str = 'test', tracking_uri: Optional[str] = None, tags: Optional[Dict[str, Any]] = None, save_dir: Optional[str] = './mlruns'): if not _MLFLOW_AVAILABLE: raise ImportError( 'You want to use `mlflow` logger which is not installed yet,' ' install it with `pip install mlflow`.') super().__init__() if not tracking_uri: tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}' self._experiment_name = experiment_name self._tracking_uri = tracking_uri self.tags = tags mlflow.set_experiment(experiment_name) run = mlflow.start_run(run_name=run_name) self._run_id = run.info.run_id self._experiment_id = run.info.experiment_id self._mlflow_client = MlflowClient(tracking_uri)
def test_metric_name_multiobjective(tmpdir: py.path.local, names: Union[str, List[str]], expected: List[str]) -> None: tracking_uri = f"file:{tmpdir}" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names) study = optuna.create_study(study_name="my_study", directions=["minimize", "maximize"]) study.optimize(_multiobjective_func, n_trials=3, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() assert all([e in first_run_dict["data"]["metrics"] for e in expected])
def test_node_hook_logging_above_limit_truncate_strategy( tmp_path, config_dir, dummy_run_params, dummy_node, param_length): # mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True) _write_yaml( tmp_path / "conf" / "base" / "mlflow.yml", dict(hooks=dict(node=dict(long_parameters_strategy="truncate")), ), ) mlflow_tracking_uri = (tmp_path / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) mlflow_node_hook = MlflowNodeHook() param_value = param_length * "a" node_inputs = {"params:my_param": param_value} with mlflow.start_run(): mlflow_node_hook.before_pipeline_run(run_params=dummy_run_params, pipeline=Pipeline([]), catalog=DataCatalog()) mlflow_node_hook.before_node_run( node=node(func=lambda x: x, inputs=dict(x="a"), outputs=None), catalog=DataCatalog(), # can be empty inputs=node_inputs, is_async=False, run_id="132", ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == { "my_param": param_value[0:MAX_PARAM_VAL_LENGTH] }
def __init__(self, name, uri, host, port): self.name = name self.uri = uri self.host = host self.port = port self.app = Flask(name) self.client = MlflowClient(tracking_uri=uri) @self.app.route("/log_artifact", methods=['POST']) def log_artifact(): if request.method == 'POST': if request.files: if 'run_id' in request.form: run_id = request.form['run_id'] else: return { 'status': 'fail', 'text': 'run_id is not found!' } tmp_path = os.path.join(str(Path.home()), "mlflow_artifacts_proxy", run_id) os.makedirs(tmp_path) for f in request.files: file = request.files[f] filename = secure_filename(file.filename) file_path = os.path.join(tmp_path, filename) file.save(file_path) self.client.log_artifact(run_id, local_path=file_path) shutil.rmtree( os.path.join(str(Path.home()), "mlflow_artifacts_proxy")) return {'status': 'success', 'text': "You're great!"} else: return {'status': 'fail', 'text': 'No files!'}
def test_log_metric_none(tmpdir: py.path.local) -> None: tracking_uri = f"file:{tmpdir}" metric_name = "metric" study_name = "my_study" mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) study = optuna.create_study(study_name=study_name) study.optimize(lambda _: np.nan, n_trials=1, callbacks=[mlflc]) mlfl_client = MlflowClient(tracking_uri) experiments = mlfl_client.list_experiments() experiment = experiments[0] experiment_id = experiment.experiment_id run_infos = mlfl_client.list_run_infos(experiment_id) assert len(run_infos) == 1 first_run_id = run_infos[0].run_id first_run = mlfl_client.get_run(first_run_id) first_run_dict = first_run.to_dictionary() # When `values` is `None`, do not save values with metric names. assert metric_name not in first_run_dict["data"]["metrics"]
def test_create_model_version_run_link_with_configured_profile( mock_registry_store): experiment_id = 'test-exp-id' hostname = 'https://workspace.databricks.com/' workspace_id = '10002' run_id = 'runid' workspace_url = construct_run_url(hostname, experiment_id, run_id, workspace_id) get_run_mock = mock.MagicMock() get_run_mock.return_value = Run( RunInfo(run_id, experiment_id, 'userid', 'status', 0, 1, None), None) with mock.patch('mlflow.tracking.client.is_in_databricks_notebook', return_value=False), \ mock.patch('mlflow.tracking.client.get_workspace_info_from_databricks_secrets', return_value=(hostname, workspace_id)): client = MlflowClient(tracking_uri='databricks', registry_uri='otherplace') client.get_run = get_run_mock mock_registry_store.create_model_version.return_value = \ ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=workspace_url) model_version = client.create_model_version('name', 'source', 'runid') assert (model_version.run_link == workspace_url) # verify that the client generated the right URL mock_registry_store.create_model_version.assert_called_once_with( "name", 'source', 'runid', [], workspace_url)
def register_model(run, model, model_name): result = mlflow.register_model( "runs:/" + run.info.run_id + "/artifacts/" + model, model_name ) description = [] for param in run.data.params: description.append( "**{}:** {}\n".format(param, run.data.params[param])) description.append( "**Accuracy:** {}".format( client.get_metric_history(run.info.run_id, "accuracy")[0].value)) description.append( "**Loss:** {}".format( client.get_metric_history(run.info.run_id, "loss")[0].value)) MlflowClient().update_model_version( name=model_name, version=result.version, description="".join(description) )
return output_df # COMMAND ---------- input_data = table('bank_db.bank_marketing_train_set') pdDF = input_data.toPandas() # COMMAND ---------- returnDF = train_xgboost(pdDF) # COMMAND ---------- # MAGIC %md # MAGIC ##### Extra: Use the MLflow client to analyse the runs programmatically # MAGIC Next to using the Experiments UI, or just putting all the results from a run into spark DF, the `MLflowClient` class can be used to look up runs programmatially from a given (list of) experiment(s). Below is an example of how we can retrieve the run_id with the highest area under the curve score by using the `client.search_runs` method. # COMMAND ---------- from mlflow.tracking import MlflowClient client = MlflowClient() best_run_id = client.search_runs(experiment_ids=[experiment_id], order_by=["metrics.auc DESC"])[0].info.run_id # COMMAND ---------- best_run_id # COMMAND ----------
def main(): df = pd.read_csv(config["train"]["data_path"]) y = np.array(df[config["train"]["label_column"]]) df = preprocess(df) label_nbr = len(df[config["train"]["label_column"]].unique()) label_names = config["train"]["label"] y = np.array(df[config["train"]["label_column"]]) df = df.drop([config["train"]["label_column"]] + config['train']['to_drop'], axis=1) X = np.array(df) print(X.shape, y.shape) try: device = torch.device(config["train"]["device"]) except: device = torch.device("cpu") classifier = Net(input_dim=df.shape[1], hidden_dim=config["train"]["hidden_dim"]).to(device) criterion = torch.nn.functional.mse_loss X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=42) X_train, y_train = torch.tensor(X_train).float(), torch.tensor( y_train).float() X_test, y_test = torch.tensor(X_test).float(), torch.tensor(y_test).float() # create dataloader with specified batch_size ds_train = torch.utils.data.TensorDataset(X_train, y_train) dataloader_train = torch.utils.data.DataLoader( ds_train, batch_size=config["train"]["batch_size"], shuffle=True) ds_test = torch.utils.data.TensorDataset(X_test, y_test) dataloader_test = torch.utils.data.DataLoader( ds_test, batch_size=config["train"]["batch_size"], shuffle=True) trainer = Trainer(classifier, device=device, criterion=criterion) trainer.train(dataloader_train, dataloader_test, config["train"]["epochs"], config["train"]["log_every"], task="regression") # eval step metrics = {} metrics["mse"] = trainer.metric mlflow.log_params(metrics) mlflow.pytorch.log_model( pytorch_model=classifier, artifact_path="model", registered_model_name=config["mlflow"]["model_name"]) api_request_model = get_request_features(df) with open("request_model.json", "w") as rmodel: json.dump(api_request_model, rmodel, indent=4) # checking if there are any productions models, # so we can put at least one in production model_name = config['mlflow']['model_name'] try: mlflow.pytorch.load_model(f"models:/{model_name}/Production") except: client = MlflowClient() version = client.search_model_versions( f"name='{model_name}'")[0].version client.transition_model_version_stage(name=model_name, version=version, stage="Production")
def mlflow_client(self): mlflow.set_tracking_uri(MLFLOW_URI) return MlflowClient()
def test_node_hook_logging( tmp_path, mocker, monkeypatch, dummy_run_params, dummy_catalog, dummy_pipeline, dummy_node, config_dir, flatten_dict_params, expected, ): mocker.patch("logging.config.dictConfig") mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True) monkeypatch.chdir(tmp_path) # config = KedroMlflowConfig( # project_path=tmp_path, # node_hook_opts={"flatten_dict_params": flatten_dict_params, "sep": "-"}, # ) # # the function is imported inside the other file antd this is the file to patch # # see https://stackoverflow.com/questions/30987973/python-mock-patch-doesnt-work-as-expected-for-public-method # mocker.patch( # "kedro_mlflow.framework.hooks.node_hook.get_mlflow_config", return_value=config # ) _write_yaml( tmp_path / "conf" / "base" / "mlflow.yml", dict( hooks=dict( node=dict( flatten_dict_params=flatten_dict_params, recursive=False, sep="-" ) ), ), ), mlflow_node_hook = MlflowNodeHook() node_inputs = { v: dummy_catalog._data_sets.get(v) for k, v in dummy_node._inputs.items() } mlflow_tracking_uri = (tmp_path / "mlruns").as_uri() mlflow.set_tracking_uri(mlflow_tracking_uri) with mlflow.start_run(): mlflow_node_hook.before_pipeline_run( run_params=dummy_run_params, pipeline=dummy_pipeline, catalog=dummy_catalog ) mlflow_node_hook.before_node_run( node=dummy_node, catalog=dummy_catalog, inputs=node_inputs, is_async=False, run_id="132", ) run_id = mlflow.active_run().info.run_id mlflow_client = MlflowClient(mlflow_tracking_uri) current_run = mlflow_client.get_run(run_id) assert current_run.data.params == expected
def mlflow_client(tracking_server_uri): """Provides an MLflow Tracking API client pointed at the local tracking server.""" mlflow.set_tracking_uri(tracking_server_uri) yield mock.Mock(wraps=MlflowClient(tracking_server_uri)) mlflow.set_tracking_uri(None)
def mlflow_resources(): uri = mlflow.get_tracking_uri() client = MlflowClient(uri) return (uri, client)
def register_model(model_uri, name, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS): """ Create a new model version in model registry for the model files specified by ``model_uri``. Note that this method assumes the model registry backend URI is the same as that of the tracking backend. :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to record the run ID with the model in model registry. ``models:/`` URIs are currently not supported. :param name: Name of the registered model under which to create a new model version. If a registered model with the given name does not exist, it will be created automatically. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by backend. .. code-block:: python :caption: Example import mlflow.sklearn from sklearn.ensemble import RandomForestRegressor mlflow.set_tracking_uri("sqlite:////tmp/mlruns.db") params = {"n_estimators": 3, "random_state": 42} # Log MLflow entities with mlflow.start_run() as run: rfr = RandomForestRegressor(**params).fit([[0, 1]], [1]) mlflow.log_params(params) mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") model_uri = "runs:/{}/sklearn-model".format(run.info.run_id) mv = mlflow.register_model(model_uri, "RandomForestRegressionModel") print("Name: {}".format(mv.name)) print("Version: {}".format(mv.version)) .. code-block:: text :caption: Output Name: RandomForestRegressionModel Version: 1 """ client = MlflowClient() try: create_model_response = client.create_registered_model(name) eprint("Successfully registered model '%s'." % create_model_response.name) except MlflowException as e: if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS): eprint( "Registered model '%s' already exists. Creating a new version of this model..." % name) else: raise e if RunsArtifactRepository.is_runs_uri(model_uri): source = RunsArtifactRepository.get_underlying_uri(model_uri) (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri) create_version_response = client.create_model_version( name, source, run_id, await_creation_for=await_registration_for) else: create_version_response = client.create_model_version( name, source=model_uri, run_id=None, await_creation_for=await_registration_for) eprint("Created version '{version}' of model '{model_name}'.".format( version=create_version_response.version, model_name=create_version_response.name)) return create_version_response
def set_tag_mock(): with mock.patch("mlflow.projects.databricks.tracking.MlflowClient") as m: mlflow_service_mock = mock.Mock(wraps=MlflowClient()) m.return_value = mlflow_service_mock yield mlflow_service_mock.set_tag