def register_model(model_uri, name): """ Create a new model version in model registry for the model files specified by ``model_uri``. Note that this method assumes the model registry backend URI is the same as that of the tracking backend. :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to record the run ID with the model in model registry. ``models:/`` URIs are currently not supported. :param name: Name of the registered model under which to create a new model version. If a registered model with the given name does not exist, it will be created automatically. :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by backend. """ client = MlflowClient() try: client.create_registered_model(name) except MlflowException as e: if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS): eprint( "Registered model %s already exists. Using it to create a new version." % name) else: raise e if RunsArtifactRepository.is_runs_uri(model_uri): source = RunsArtifactRepository.get_underlying_uri(model_uri) (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri) return client.create_model_version(name, source, run_id) else: return client.create_model_version(name, source=model_uri, run_id=None)
def test_runs_artifact_repo_uses_repo_download_artifacts(): """ The RunsArtifactRepo should delegate `download_artifacts` to it's self.repo.download_artifacts function """ artifact_location = "s3://blah_bucket/" experiment_id = mlflow.create_experiment("expr_abcd", artifact_location) with mlflow.start_run(experiment_id=experiment_id): run_id = mlflow.active_run().info.run_id runs_repo = RunsArtifactRepository('runs:/{}'.format(run_id)) runs_repo.repo = Mock() runs_repo.download_artifacts('artifact_path', 'dst_path') runs_repo.repo.download_artifacts.assert_called_once()
def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel .. code-block:: python :caption: Example from mlflow import spark model = mlflow.spark.load_model("spark-model") # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([ (4, "spark i j k"), (5, "l m n"), (6, "spark hadoop spark"), (7, "apache hadoop")], ["id", "text"]) # Make predictions on test documents prediction = model.transform(test) """ if RunsArtifactRepository.is_runs_uri(model_uri): runs_uri = model_uri model_uri = RunsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) elif ModelsArtifactRepository.is_models_uri(model_uri): runs_uri = model_uri model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME) model_uri = append_to_uri_path(model_uri, flavor_conf["model_data"]) local_model_path = _download_artifact_from_uri(model_uri) _add_code_from_conf_to_system_path(local_model_path, flavor_conf) return _load_model(model_uri=model_uri, dfs_tmpdir_base=dfs_tmpdir)
def register_model(model_uri, name, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS): """ Create a new model version in model registry for the model files specified by ``model_uri``. Note that this method assumes the model registry backend URI is the same as that of the tracking backend. :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to record the run ID with the model in model registry. ``models:/`` URIs are currently not supported. :param name: Name of the registered model under which to create a new model version. If a registered model with the given name does not exist, it will be created automatically. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by backend. """ client = MlflowClient() try: create_model_response = client.create_registered_model(name) eprint("Successfully registered model '%s'." % create_model_response.name) except MlflowException as e: if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS): eprint( "Registered model '%s' already exists. Creating a new version of this model..." % name) else: raise e if RunsArtifactRepository.is_runs_uri(model_uri): source = RunsArtifactRepository.get_underlying_uri(model_uri) (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri) create_version_response = client.create_model_version( name, source, run_id) else: create_version_response = client.create_model_version( name, source=model_uri, run_id=None, await_creation_for=await_registration_for) eprint("Created version '{version}' of model '{model_name}'.".format( version=create_version_response.version, model_name=create_version_response.name)) return create_version_response
def test_get_artifact_uri(uri, expected_tracking_uri, mock_uri, expected_result_uri): with mock.patch("mlflow.tracking.artifact_utils.get_artifact_uri", return_value=mock_uri) as get_artifact_uri_mock: result_uri = RunsArtifactRepository.get_underlying_uri(uri) get_artifact_uri_mock.assert_called_once_with("1234abcdf1394asdfwer33", "path/model", expected_tracking_uri) assert result_uri == expected_result_uri
def test_runs_artifact_repo_init(): artifact_location = "s3://blah_bucket/" experiment_id = mlflow.create_experiment("expr_abc", artifact_location) with mlflow.start_run(experiment_id=experiment_id): run_id = mlflow.active_run().info.run_id runs_uri = "runs:/%s/path/to/model" % run_id runs_repo = RunsArtifactRepository(runs_uri) assert runs_repo.artifact_uri == runs_uri assert isinstance(runs_repo.repo, S3ArtifactRepository) expected_absolute_uri = "%s%s/artifacts/path/to/model" % ( artifact_location, run_id) assert runs_repo.repo.artifact_uri == expected_absolute_uri
def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html# artifact-locations>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel >>> from mlflow import spark >>> model = mlflow.spark.load_model("spark-model") >>> # Prepare test documents, which are unlabeled (id, text) tuples. >>> test = spark.createDataFrame([ ... (4, "spark i j k"), ... (5, "l m n"), ... (6, "spark hadoop spark"), ... (7, "apache hadoop")], ["id", "text"]) >>> # Make predictions on test documents. >>> prediction = model.transform(test) """ if RunsArtifactRepository.is_runs_uri(model_uri): runs_uri = model_uri model_uri = RunsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME) model_uri = posixpath.join(model_uri, flavor_conf["model_data"]) return _load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmpdir)
def register_model(model_uri, name, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS): """ Create a new model version in model registry for the model files specified by ``model_uri``. Note that this method assumes the model registry backend URI is the same as that of the tracking backend. :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to record the run ID with the model in model registry. ``models:/`` URIs are currently not supported. :param name: Name of the registered model under which to create a new model version. If a registered model with the given name does not exist, it will be created automatically. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by backend. .. code-block:: python :caption: Example import mlflow.sklearn from sklearn.ensemble import RandomForestRegressor mlflow.set_tracking_uri("sqlite:////tmp/mlruns.db") params = {"n_estimators": 3, "random_state": 42} # Log MLflow entities with mlflow.start_run() as run: rfr = RandomForestRegressor(**params).fit([[0, 1]], [1]) mlflow.log_params(params) mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model") model_uri = "runs:/{}/sklearn-model".format(run.info.run_id) mv = mlflow.register_model(model_uri, "RandomForestRegressionModel") print("Name: {}".format(mv.name)) print("Version: {}".format(mv.version)) .. code-block:: text :caption: Output Name: RandomForestRegressionModel Version: 1 """ client = MlflowClient() try: create_model_response = client.create_registered_model(name) eprint("Successfully registered model '%s'." % create_model_response.name) except MlflowException as e: if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS): eprint( "Registered model '%s' already exists. Creating a new version of this model..." % name) else: raise e if RunsArtifactRepository.is_runs_uri(model_uri): source = RunsArtifactRepository.get_underlying_uri(model_uri) (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri) create_version_response = client.create_model_version( name, source, run_id, await_creation_for=await_registration_for) else: create_version_response = client.create_model_version( name, source=model_uri, run_id=None, await_creation_for=await_registration_for) eprint("Created version '{version}' of model '{model_name}'.".format( version=create_version_response.version, model_name=create_version_response.name)) return create_version_response
def test_parse_runs_uri_invalid_input(uri): with pytest.raises(MlflowException): RunsArtifactRepository.parse_runs_uri(uri)
def test_parse_runs_uri_valid_input(uri, expected_run_id, expected_artifact_path): (run_id, artifact_path) = RunsArtifactRepository.parse_runs_uri(uri) assert run_id == expected_run_id assert artifact_path == expected_artifact_path
def test_parse_runs_uri_invalid_input(uri): with pytest.raises(MlflowException, match="Not a proper runs"): RunsArtifactRepository.parse_runs_uri(uri)