def _get_run_link(self, tracking_uri, run_id): # if using the default Databricks tracking URI and in a notebook, we can automatically # figure out the run-link. if is_databricks_default_tracking_uri(tracking_uri) and ( is_in_databricks_notebook() or is_in_databricks_job()): # use DBUtils to determine workspace information. workspace_host, workspace_id = get_workspace_info_from_dbutils() else: # in this scenario, we're not able to automatically extract the workspace ID # to proceed, and users will need to pass in a databricks profile with the scheme: # databricks://scope:prefix and store the host and workspace-ID as a secret in the # Databricks Secret Manager with scope=<scope> and key=<prefix>-workspaceid. workspace_host, workspace_id = get_workspace_info_from_databricks_secrets( tracking_uri) if not workspace_id: print( "No workspace ID specified; if your Databricks workspaces share the same" " host URL, you may want to specify the workspace ID (along with the host" " information in the secret manager) for run lineage tracking. For more" " details on how to specify this information in the secret manager," " please refer to the model registry documentation.") # retrieve experiment ID of the run for the URL experiment_id = self.get_run(run_id).info.experiment_id if workspace_host and run_id and experiment_id: return construct_run_url(workspace_host, experiment_id, run_id, workspace_id)
def test_get_workspace_info_from_dbutils_old_runtimes(): mock_dbutils = mock.MagicMock() methods = ["notebook.entry_point.getDbutils", "notebook", "getContext"] mock_method_chain( mock_dbutils, methods + ["toJson", "get"], return_value='{"tags": {"orgId" : "1111", "browserHostName": "mlflow.databricks.com"}}', ) mock_method_chain( mock_dbutils, methods + ["browserHostName", "get"], return_value="mlflow.databricks.com" ) # Mock out workspace ID tag mock_workspace_id_tag_opt = mock.MagicMock() mock_workspace_id_tag_opt.isDefined.return_value = True mock_workspace_id_tag_opt.get.return_value = "1111" mock_method_chain( mock_dbutils, methods + ["tags", "get"], return_value=mock_workspace_id_tag_opt ) # Mimic old runtimes by raising an exception when the nonexistent "workspaceId" method is called mock_method_chain( mock_dbutils, methods + ["workspaceId"], side_effect=Exception("workspaceId method not defined!"), ) with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils): workspace_host, workspace_id = get_workspace_info_from_dbutils() assert workspace_host == "https://mlflow.databricks.com" assert workspace_id == "1111"
def tags(self): job_id = databricks_utils.get_job_id() job_run_id = databricks_utils.get_job_run_id() job_type = databricks_utils.get_job_type() webapp_url = databricks_utils.get_webapp_url() workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils( ) tags = { MLFLOW_SOURCE_NAME: ("jobs/{job_id}/run/{job_run_id}".format(job_id=job_id, job_run_id=job_run_id) if job_id is not None and job_run_id is not None else None), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), } if job_id is not None: tags[MLFLOW_DATABRICKS_JOB_ID] = job_id if job_run_id is not None: tags[MLFLOW_DATABRICKS_JOB_RUN_ID] = job_run_id if job_type is not None: tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type if webapp_url is not None: tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url if workspace_url is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url if workspace_id is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags
def test_get_workspace_info_from_dbutils(): mock_dbutils = mock.MagicMock() mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value.toJson.return_value = ( # noqa '{"tags": {"orgId" : "1111", "browserHostName": "mlflow.databricks.com"}}' ) with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils): workspace_host, workspace_id = get_workspace_info_from_dbutils() assert workspace_host == "https://mlflow.databricks.com" assert workspace_id == "1111"
def create_model_version(self, name, source, run_id, tags=None, run_link=None): """ Create a new model version from given source or run ID. :param name: Name ID for containing registered model. :param source: Source path where the MLflow model is stored. :param run_id: Run ID from MLflow tracking server that generated the model :param tags: A dictionary of key-value pairs that are converted into :py:class:`mlflow.entities.model_registry.ModelVersionTag` objects. :param run_link: Link to the run from an MLflow tracking server that generated this model. :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by backend. """ tracking_uri = self._tracking_client.tracking_uri # for Databricks backends, we support automatically populating the run link field if is_databricks_uri( tracking_uri ) and tracking_uri != self._registry_uri and not run_link: # if using the default Databricks tracking URI and in a notebook, we can automatically # figure out the run-link. if is_databricks_default_tracking_uri( tracking_uri) and is_in_databricks_notebook(): # use DBUtils to determine workspace information. workspace_host, workspace_id = get_workspace_info_from_dbutils( ) else: # in this scenario, we're not able to automatically extract the workspace ID # to proceed, and users will need to pass in a databricks profile with the scheme: # databricks://scope/prefix and store the host and workspace-ID as a secret in the # Databricks Secret Manager with scope=<scope> and key=<prefix>-workspaceid. workspace_host, workspace_id = \ get_workspace_info_from_databricks_secrets(tracking_uri) if not workspace_id: print( "No workspace ID specified; if your Databricks workspaces share the same" " host URL, you may want to specify the workspace ID (along with the host" " information in the secret manager) for run lineage tracking. For more" " details on how to specify this information in the secret manager," " please refer to the model registry documentation.") # retrieve experiment ID of the run for the URL experiment_id = self.get_run(run_id).info.experiment_id if workspace_host and run_id and experiment_id: run_link = construct_run_url(workspace_host, experiment_id, run_id, workspace_id) return self._get_registry_client().create_model_version( name=name, source=source, run_id=run_id, tags=tags, run_link=run_link)
def test_get_workspace_info_from_dbutils(): mock_dbutils = mock.MagicMock() mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext\ .return_value.toJson.return_value = '{"extraContext":' \ '{"api_url": "https://mlflow.databricks.com"},' \ '"tags": {"orgId" : "1111"}}' with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils): workspace_host, workspace_id = get_workspace_info_from_dbutils() assert workspace_host == 'https://mlflow.databricks.com' assert workspace_id == '1111'
def test_get_workspace_info_from_dbutils(): mock_dbutils = mock.MagicMock() mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value.browserHostName.return_value.get.return_value = ( # noqa "mlflow.databricks.com") mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value.workspaceId.return_value.get.return_value = ( # noqa "1111") with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils): workspace_host, workspace_id = get_workspace_info_from_dbutils() assert workspace_host == "https://mlflow.databricks.com" assert workspace_id == "1111"
def test_get_workspace_info_from_dbutils_no_browser_host_name(): mock_dbutils = mock.MagicMock() methods = ["notebook.entry_point.getDbutils", "notebook", "getContext"] mock_method_chain(mock_dbutils, methods + ["browserHostName", "get"], return_value=None) mock_method_chain( mock_dbutils, methods + ["apiUrl", "get"], return_value="https://mlflow.databricks.com" ) mock_method_chain(mock_dbutils, methods + ["workspaceId", "get"], return_value="1111") with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils): workspace_host, workspace_id = get_workspace_info_from_dbutils() assert workspace_host == "https://mlflow.databricks.com" assert workspace_id == "1111"
def tags(self): notebook_id = databricks_utils.get_notebook_id() notebook_path = databricks_utils.get_notebook_path() webapp_url = databricks_utils.get_webapp_url() workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils( ) tags = { MLFLOW_SOURCE_NAME: notebook_path, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), } if notebook_id is not None: tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id if notebook_path is not None: tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path if webapp_url is not None: tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url if workspace_url is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url if workspace_id is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags
def test_get_workspace_info_from_dbutils_when_no_dbutils_available(): with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=None): workspace_host, workspace_id = get_workspace_info_from_dbutils() assert workspace_host is None assert workspace_id is None