def test_store_object_can_be_serialized_by_pickle(tmpdir): """ This test ensures a store object generated by `_get_store` can be serialized by pickle to prevent issues such as https://github.com/mlflow/mlflow/issues/2954 """ pickle.dump(_get_store(f"file:///{tmpdir.join('mlflow').strpath}"), io.BytesIO()) pickle.dump(_get_store("databricks"), io.BytesIO()) pickle.dump(_get_store("https://example.com"), io.BytesIO())
def get_artifact_uri(run_id, artifact_path=None, tracking_uri=None): """ Get the absolute URI of the specified artifact in the specified run. If `path` is not specified, the artifact root URI of the specified run will be returned; calls to ``log_artifact`` and ``log_artifacts`` write artifact(s) to subdirectories of the artifact root URI. :param run_id: The ID of the run for which to obtain an absolute artifact URI. :param artifact_path: The run-relative artifact path. For example, ``path/to/artifact``. If unspecified, the artifact root URI for the specified run will be returned. :param tracking_uri: The tracking URI from which to get the run and its artifact location. If not given, the current default tracking URI is used. :return: An *absolute* URI referring to the specified artifact or the specified run's artifact root. For example, if an artifact path is provided and the specified run uses an S3-backed store, this may be a uri of the form ``s3://<bucket_name>/path/to/artifact/root/path/to/artifact``. If an artifact path is not provided and the specified run uses an S3-backed store, this may be a URI of the form ``s3://<bucket_name>/path/to/artifact/root``. """ if not run_id: raise MlflowException( message= "A run_id must be specified in order to obtain an artifact uri!", error_code=INVALID_PARAMETER_VALUE, ) store = _get_store(tracking_uri) run = store.get_run(run_id) # Maybe move this method to RunsArtifactRepository so the circular dependency is clearer. assert urllib.parse.urlparse( run.info.artifact_uri).scheme != "runs" # avoid an infinite loop if artifact_path is None: return run.info.artifact_uri else: return append_to_uri_path(run.info.artifact_uri, artifact_path)
def test_get_store_basic_rest_store(): env = {_TRACKING_URI_ENV_VAR: "https://my-tracking-server:5050"} with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, RestStore) assert store.get_host_creds().host == "https://my-tracking-server:5050" assert store.get_host_creds().token is None
def test_get_store_file_store_from_env(tmp_wkdir, uri): env = {_TRACKING_URI_ENV_VAR: uri} with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, FileStore) assert os.path.abspath( store.root_directory) == os.path.abspath("other/path")
def test_get_store_file_store_from_arg(tmp_wkdir): env = {} with mock.patch.dict(os.environ, env): store = _get_store("other/path") assert isinstance(store, FileStore) assert os.path.abspath( store.root_directory) == os.path.abspath("other/path")
def _get_host_creds_from_default_store(): store = utils._get_store() if not isinstance(store, RestStore): raise MlflowException('Failed to get credentials for DBFS; they are read from the ' + 'Databricks CLI credentials or MLFLOW_TRACKING* environment ' + 'variables.') return store.get_host_creds
def test_get_store_rest_store_with_insecure(): env = { _TRACKING_URI_ENV_VAR: "https://my-tracking-server:5050", _TRACKING_INSECURE_TLS_ENV_VAR: "true", } with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, RestStore) assert store.get_host_creds().ignore_tls_verification
def test_get_store_rest_store_with_token(): env = { _TRACKING_URI_ENV_VAR: "https://my-tracking-server:5050", _TRACKING_TOKEN_ENV_VAR: "my-token", } with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, RestStore) assert store.get_host_creds().token == "my-token"
def test_get_store_databricks(): env = { _TRACKING_URI_ENV_VAR: "databricks", "DATABRICKS_HOST": "https://my-tracking-server", "DATABRICKS_TOKEN": "abcdef", } with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, RestStore) assert store.get_host_creds().host == "https://my-tracking-server" assert store.get_host_creds().token == "abcdef"
def test_get_store_databricks_profile(): env = { _TRACKING_URI_ENV_VAR: "databricks://mycoolprofile", } # It's kind of annoying to setup a profile, and we're not really trying to test # that anyway, so just check if we raise a relevant exception. with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, RestStore) with pytest.raises(MlflowException, match="mycoolprofile"): store.get_host_creds()
def test_get_store_rest_store_with_password(): env = { _TRACKING_URI_ENV_VAR: "https://my-tracking-server:5050", _TRACKING_USERNAME_ENV_VAR: "Bob", _TRACKING_PASSWORD_ENV_VAR: "Ross", } with mock.patch.dict(os.environ, env): store = _get_store() assert isinstance(store, RestStore) assert store.get_host_creds().host == "https://my-tracking-server:5050" assert store.get_host_creds().username == "Bob" assert store.get_host_creds().password == "Ross"
def test_get_store_sqlalchemy_store(tmp_wkdir, db_type): patch_create_engine = mock.patch("sqlalchemy.create_engine") uri = "{}://hostname/database".format(db_type) env = {_TRACKING_URI_ENV_VAR: uri} with mock.patch.dict(os.environ, env), patch_create_engine as mock_create_engine,\ mock.patch("mlflow.store.db.utils._verify_schema"), \ mock.patch("mlflow.store.db.utils._initialize_tables"): store = _get_store() assert isinstance(store, SqlAlchemyStore) assert store.db_uri == uri assert store.artifact_root_uri == "./mlruns" mock_create_engine.assert_called_once_with(uri, pool_pre_ping=True)
def test_get_store_sqlalchemy_store(tmp_wkdir, db_type): patch_create_engine = mock.patch("sqlalchemy.create_engine") uri = "{}://hostname/database".format(db_type) env = {_TRACKING_URI_ENV_VAR: uri} with mock.patch.dict(os.environ, env), patch_create_engine as mock_create_engine, mock.patch( "mlflow.store.db.utils._verify_schema" ), mock.patch("mlflow.store.db.utils._initialize_tables"), mock.patch( # In sqlalchemy 1.4.0, `SqlAlchemyStore.list_experiments`, which is called when fetching # the store, results in an error when called with a mocked sqlalchemy engine. # Accordingly, we mock `SqlAlchemyStore.list_experiments` "mlflow.store.tracking.sqlalchemy_store.SqlAlchemyStore.list_experiments", return_value=[], ): store = _get_store() assert isinstance(store, SqlAlchemyStore) assert store.db_uri == uri assert store.artifact_root_uri == "./mlruns" mock_create_engine.assert_called_once_with(uri, pool_pre_ping=True)
def __init__(self, tracking_uri): """ :param tracking_uri: Address of local or remote tracking server. """ self.tracking_uri = tracking_uri self.store = utils._get_store(self.tracking_uri)
def store(self): return utils._get_store(self.tracking_uri)
import mlflow from mlflow.tracking._tracking_service import utils import os if __name__ == "__main__": mlflow.set_tracking_uri('databricks') # Note: get_host_creds will be undefined if not logging to a remote tracking server, e.g. if logging to the local filesystem host_creds = utils._get_store().get_host_creds() token = host_creds.token host = host_creds.host print(host, token, os.getgid())