def test_models_artifact_repo_uses_repo_download_artifacts( mock_get_model_version_download_uri, ): # pylint: disable=unused-argument """ ``ModelsArtifactRepository`` should delegate `download_artifacts` to its ``self.repo.download_artifacts`` function. """ model_uri = "models:/MyModel/12" models_repo = ModelsArtifactRepository(model_uri) models_repo.repo = Mock() models_repo.download_artifacts("artifact_path", "dst_path") models_repo.repo.download_artifacts.assert_called_once()
def test_models_artifact_repo_uses_repo_download_artifacts(): """ ``ModelsArtifactRepository`` should delegate `download_artifacts` to its ``self.repo.download_artifacts`` function. """ artifact_location = "s3://blah_bucket/" with mock.patch.object(MlflowClient, "get_model_version_download_uri", return_value=artifact_location): model_uri = "models:/MyModel/12" models_repo = ModelsArtifactRepository(model_uri) models_repo.repo = Mock() models_repo.download_artifacts("artifact_path", "dst_path") models_repo.repo.download_artifacts.assert_called_once()
def _get_flavor_backend(model_uri, **kwargs): with TempDir() as tmp: if ModelsArtifactRepository.is_models_uri(model_uri): underlying_model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri) else: underlying_model_uri = model_uri local_path = _download_artifact_from_uri(posixpath.join(underlying_model_uri, "MLmodel"), output_path=tmp.path()) model = Model.load(local_path) flavor_name, flavor_backend = get_flavor_backend(model, **kwargs) if flavor_backend is None: raise Exception("No suitable flavor backend was found for the model.") _logger.info("Selected backend for flavor '%s'", flavor_name) return flavor_backend
def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel .. code-block:: python :caption: Example from mlflow import spark model = mlflow.spark.load_model("spark-model") # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([ (4, "spark i j k"), (5, "l m n"), (6, "spark hadoop spark"), (7, "apache hadoop")], ["id", "text"]) # Make predictions on test documents prediction = model.transform(test) """ if RunsArtifactRepository.is_runs_uri(model_uri): runs_uri = model_uri model_uri = RunsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) elif ModelsArtifactRepository.is_models_uri(model_uri): runs_uri = model_uri model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME) model_uri = append_to_uri_path(model_uri, flavor_conf["model_data"]) local_model_path = _download_artifact_from_uri(model_uri) _add_code_from_conf_to_system_path(local_model_path, flavor_conf) return _load_model(model_uri=model_uri, dfs_tmpdir_base=dfs_tmpdir)
def test_models_artifact_repo_init_with_stage_uri_and_not_using_databricks_registry( ): model_uri = "models:/MyModel/Staging" artifact_location = "s3://blah_bucket/" model_version_detailed = ModelVersion( "MyModel", "10", "2345671890", "234567890", "some description", "UserID", "Production", "source", "run12345", ) get_latest_versions_patch = mock.patch.object( MlflowClient, "get_latest_versions", return_value=[model_version_detailed]) get_model_version_download_uri_patch = mock.patch.object( MlflowClient, "get_model_version_download_uri", return_value=artifact_location) with get_latest_versions_patch, get_model_version_download_uri_patch, mock.patch( "mlflow.store.artifact.artifact_repository_registry.get_artifact_repository" ) as get_repo_mock: get_repo_mock.return_value = None ModelsArtifactRepository(model_uri) get_repo_mock.assert_called_once_with(artifact_location)
def test_models_artifact_repo_init_with_stage_uri_and_db_profile(): model_uri = "models://profile@databricks/MyModel/Staging" artifact_location = "dbfs:/databricks/mlflow-registry/12345/models/keras-model" final_uri = "dbfs://profile@databricks/databricks/mlflow-registry/12345/models/keras-model" model_version_detailed = ModelVersion( "MyModel", "10", "2345671890", "234567890", "some description", "UserID", "Production", "source", "run12345", ) get_latest_versions_patch = mock.patch.object( MlflowClient, "get_latest_versions", return_value=[model_version_detailed] ) get_model_version_download_uri_patch = mock.patch.object( MlflowClient, "get_model_version_download_uri", return_value=artifact_location ) with get_latest_versions_patch, get_model_version_download_uri_patch, mock.patch( "mlflow.store.artifact.dbfs_artifact_repo.DbfsRestArtifactRepository", autospec=True ) as mock_repo: models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) mock_repo.assert_called_once_with(final_uri)
def _download_artifact_from_uri(artifact_uri, output_path=None): """ :param artifact_uri: The *absolute* URI of the artifact to download. :param output_path: The local filesystem path to which to download the artifact. If unspecified, a local output path will be created. """ parsed_uri = urllib.parse.urlparse(artifact_uri) prefix = "" if parsed_uri.scheme and not parsed_uri.path.startswith("/"): # relative path is a special case, urllib does not reconstruct it properly prefix = parsed_uri.scheme + ":" parsed_uri = parsed_uri._replace(scheme="") # For models:/ URIs, it doesn't make sense to initialize a ModelsArtifactRepository with only # the model name portion of the URI, then call download_artifacts with the version info. if ModelsArtifactRepository.is_models_uri(artifact_uri): root_uri = artifact_uri artifact_path = "" else: artifact_path = posixpath.basename(parsed_uri.path) parsed_uri = parsed_uri._replace( path=posixpath.dirname(parsed_uri.path)) root_uri = prefix + urllib.parse.urlunparse(parsed_uri) return get_artifact_repository(artifact_uri=root_uri).download_artifacts( artifact_path=artifact_path, dst_path=output_path)
def test_models_artifact_repo_init_with_stage_uri( host_creds_mock, ): # pylint: disable=unused-argument model_uri = "models:/MyModel/Production" artifact_location = "dbfs:/databricks/mlflow-registry/12345/models/keras-model" model_version_detailed = ModelVersion( "MyModel", "10", "2345671890", "234567890", "some description", "UserID", "Production", "source", "run12345", ) get_latest_versions_patch = mock.patch.object( MlflowClient, "get_latest_versions", return_value=[model_version_detailed] ) get_model_version_download_uri_patch = mock.patch.object( MlflowClient, "get_model_version_download_uri", return_value=artifact_location ) with get_latest_versions_patch, get_model_version_download_uri_patch: models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) assert models_repo.repo.artifact_uri == artifact_location
def test_models_artifact_repo_init_with_version_uri_and_bad_db_profile_from_context( mock_get_model_version_download_uri, ): # pylint: disable=unused-argument model_uri = "models:/MyModel/12" with mock.patch("mlflow.get_registry_uri", return_value="databricks://scope:key:invalid"): with pytest.raises(MlflowException) as ex: ModelsArtifactRepository(model_uri) assert "Key prefixes cannot contain" in ex.value.message
def _get_flavor_backend(model_uri, **kwargs): from mlflow.models.flavor_backend_registry import get_flavor_backend with TempDir() as tmp: if ModelsArtifactRepository.is_models_uri(model_uri): underlying_model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri) else: underlying_model_uri = model_uri local_path = _download_artifact_from_uri( append_to_uri_path(underlying_model_uri, MLMODEL_FILE_NAME), output_path=tmp.path() ) model = Model.load(local_path) flavor_name, flavor_backend = get_flavor_backend(model, **kwargs) if flavor_backend is None: raise Exception("No suitable flavor backend was found for the model.") _logger.info("Selected backend for flavor '%s'", flavor_name) return flavor_backend
def test_models_artifact_repo_init_with_uri_containing_profile( uri_with_profile): with mock.patch(MODELS_ARTIFACT_REPOSITORY_PACKAGE + ".DatabricksModelsArtifactRepository", autospec=True) as mock_repo: models_repo = ModelsArtifactRepository(uri_with_profile) assert models_repo.artifact_uri == uri_with_profile assert isinstance(models_repo.repo, DatabricksModelsArtifactRepository) mock_repo.assert_called_once_with(uri_with_profile)
def test_models_artifact_repo_init_with_version_uri( host_creds_mock, mock_get_model_version_download_uri, artifact_location): # pylint: disable=unused-argument model_uri = "models:/MyModel/12" models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) assert models_repo.repo.artifact_uri == artifact_location # Also confirm that since no databricks:// registry|tracking URI is set in the environment, # databricks profile information not is added to the final DBFS URI. with mock.patch( "mlflow.store.artifact.dbfs_artifact_repo.DbfsRestArtifactRepository", autospec=True) as mock_repo: models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) mock_repo.assert_called_once_with( "dbfs:/databricks/mlflow-registry/12345/models/keras-model")
def build_image_local_from_model_uri(self, model_uri, base_image, mlflow_home=None, **kwargs): """build PythonModel Backed service image from model_uri :param base_image: image base from which build model image :param mlflow_home: mllfow local copy used to startup the model service in container if None install from pip. :param model_uri: directory contains pyfunc model filesystem. <"pyfunc-filename-system" https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#pyfunc-filename-system>_ """ with tempfile.TemporaryDirectory() as tmp_dir: if ModelsArtifactRepository.is_models_uri(model_uri): underlying_model_uri = ModelsArtifactRepository.get_underlying_uri( model_uri) else: underlying_model_uri = model_uri local_path = _download_artifact_from_uri(append_to_uri_path( underlying_model_uri, MLMODEL_FILE_NAME), output_path=tmp_dir) model_meta = Model.load(local_path) flavor_name, flavor_backend = get_flavor_backend( model_meta, **kwargs) if flavor_name is None: raise TypeError("no suitable backend was found for the model") if not flavor_backend.can_build_image(): raise AttributeError( 'flavor {} not support build image'.format(flavor_name)) # always intall mlflow for override office mlflow package in container return_code = flavor_backend.build_image(model_uri, self.image_name, install_mlflow=True, mlflow_home=mlflow_home, base_image=base_image) return True if not return_code else False
def test_models_artifact_repo_init_with_version_uri_and_db_profile( mock_get_model_version_download_uri, ): # pylint: disable=unused-argument model_uri = "models://profile@databricks/MyModel/12" final_uri = "dbfs://profile@databricks/databricks/mlflow-registry/12345/models/keras-model" with mock.patch( "mlflow.store.artifact.dbfs_artifact_repo.DbfsRestArtifactRepository", autospec=True) as mock_repo: models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) mock_repo.assert_called_once_with(final_uri)
def test_models_artifact_repo_init_with_version_uri( host_creds_mock): # pylint: disable=unused-argument model_uri = "models:/MyModel/12" artifact_location = "dbfs://databricks/mlflow-registry/12345/models/keras-model" get_model_version_download_uri_patch = mock.patch.object(MlflowClient, "get_model_version_download_uri", return_value=artifact_location) with get_model_version_download_uri_patch: models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) assert models_repo.repo.artifact_uri == artifact_location
def test_models_artifact_repo_init_with_db_profile_inferred_from_context( uri_without_profile): with mock.patch( MODELS_ARTIFACT_REPOSITORY_PACKAGE + ".DatabricksModelsArtifactRepository", autospec=True) as mock_repo, mock.patch( "mlflow.store.artifact.utils.models.mlflow.get_registry_uri", return_value="databricks://getRegistryUriDefault", ): models_repo = ModelsArtifactRepository(uri_without_profile) assert models_repo.artifact_uri == uri_without_profile assert isinstance(models_repo.repo, DatabricksModelsArtifactRepository) mock_repo.assert_called_once_with(uri_without_profile)
def test_models_artifact_repo_init_with_version_uri_and_db_profile(): model_uri = "models://profile@databricks/MyModel/12" artifact_location = "dbfs:/databricks/mlflow-registry/12345/models/keras-model" final_uri = "dbfs://profile@databricks/databricks/mlflow-registry/12345/models/keras-model" get_model_version_download_uri_patch = mock.patch.object( MlflowClient, "get_model_version_download_uri", return_value=artifact_location ) with get_model_version_download_uri_patch, mock.patch( "mlflow.store.artifact.dbfs_artifact_repo.DbfsRestArtifactRepository", autospec=True ) as mock_repo: models_repo = ModelsArtifactRepository(model_uri) assert models_repo.artifact_uri == model_uri assert isinstance(models_repo.repo, DbfsRestArtifactRepository) mock_repo.assert_called_once_with(final_uri)
def test_models_artifact_repo_init_with_version_uri_and_not_using_databricks_registry( ): non_databricks_uri = "non_databricks_uri" artifact_location = "s3://blah_bucket/" with mock.patch.object( MlflowClient, "get_model_version_download_uri", return_value=artifact_location ), mock.patch( "mlflow.store.artifact.utils.models.mlflow.get_registry_uri", return_value=non_databricks_uri, ), mock.patch( "mlflow.store.artifact.artifact_repository_registry.get_artifact_repository" ) as get_repo_mock: get_repo_mock.return_value = None model_uri = "models:/MyModel/12" ModelsArtifactRepository(model_uri) get_repo_mock.assert_called_once_with(artifact_location)
def _download_artifact_from_uri(artifact_uri, output_path=None): """ :param artifact_uri: The *absolute* URI of the artifact to download. :param output_path: The local filesystem path to which to download the artifact. If unspecified, a local output path will be created. """ if os.path.exists(artifact_uri): if os.name != "nt": # If we're dealing with local files, just reference the direct pathing. # non-nt-based file systems can directly reference path information, while nt-based # systems need to url-encode special characters in directory listings to be able to # resolve them (i.e., spaces converted to %20 within a file name or path listing) root_uri = os.path.dirname(artifact_uri) artifact_path = os.path.basename(artifact_uri) return get_artifact_repository( artifact_uri=root_uri).download_artifacts( artifact_path=artifact_path, dst_path=output_path) else: # if we're dealing with nt-based systems, we need to utilize pathname2url to encode. artifact_uri = path_to_local_file_uri(artifact_uri) parsed_uri = urllib.parse.urlparse(str(artifact_uri)) prefix = "" if parsed_uri.scheme and not parsed_uri.path.startswith("/"): # relative path is a special case, urllib does not reconstruct it properly prefix = parsed_uri.scheme + ":" parsed_uri = parsed_uri._replace(scheme="") # For models:/ URIs, it doesn't make sense to initialize a ModelsArtifactRepository with only # the model name portion of the URI, then call download_artifacts with the version info. if ModelsArtifactRepository.is_models_uri(artifact_uri): root_uri = artifact_uri artifact_path = "" else: artifact_path = posixpath.basename(parsed_uri.path) parsed_uri = parsed_uri._replace( path=posixpath.dirname(parsed_uri.path)) root_uri = prefix + urllib.parse.urlunparse(parsed_uri) return get_artifact_repository(artifact_uri=root_uri).download_artifacts( artifact_path=artifact_path, dst_path=output_path)
def test_parse_models_uri_invalid_input(uri): with pytest.raises(MlflowException): ModelsArtifactRepository._parse_uri(uri)
def test_parse_models_uri_with_stage(uri, expected_name, expected_stage): (name, version, stage) = ModelsArtifactRepository._parse_uri(uri) assert name == expected_name assert version is None assert stage == expected_stage