Example #1
0
 def _get_run_link(self, tracking_uri, run_id):
     # if using the default Databricks tracking URI and in a notebook, we can automatically
     # figure out the run-link.
     if is_databricks_default_tracking_uri(tracking_uri) and (
             is_in_databricks_notebook() or is_in_databricks_job()):
         # use DBUtils to determine workspace information.
         workspace_host, workspace_id = get_workspace_info_from_dbutils()
     else:
         # in this scenario, we're not able to automatically extract the workspace ID
         # to proceed, and users will need to pass in a databricks profile with the scheme:
         # databricks://scope:prefix and store the host and workspace-ID as a secret in the
         # Databricks Secret Manager with scope=<scope> and key=<prefix>-workspaceid.
         workspace_host, workspace_id = get_workspace_info_from_databricks_secrets(
             tracking_uri)
         if not workspace_id:
             print(
                 "No workspace ID specified; if your Databricks workspaces share the same"
                 " host URL, you may want to specify the workspace ID (along with the host"
                 " information in the secret manager) for run lineage tracking. For more"
                 " details on how to specify this information in the secret manager,"
                 " please refer to the model registry documentation.")
     # retrieve experiment ID of the run for the URL
     experiment_id = self.get_run(run_id).info.experiment_id
     if workspace_host and run_id and experiment_id:
         return construct_run_url(workspace_host, experiment_id, run_id,
                                  workspace_id)
def test_get_workspace_info_from_dbutils_old_runtimes():
    mock_dbutils = mock.MagicMock()
    methods = ["notebook.entry_point.getDbutils", "notebook", "getContext"]
    mock_method_chain(
        mock_dbutils,
        methods + ["toJson", "get"],
        return_value='{"tags": {"orgId" : "1111", "browserHostName": "mlflow.databricks.com"}}',
    )
    mock_method_chain(
        mock_dbutils, methods + ["browserHostName", "get"], return_value="mlflow.databricks.com"
    )

    # Mock out workspace ID tag
    mock_workspace_id_tag_opt = mock.MagicMock()
    mock_workspace_id_tag_opt.isDefined.return_value = True
    mock_workspace_id_tag_opt.get.return_value = "1111"
    mock_method_chain(
        mock_dbutils, methods + ["tags", "get"], return_value=mock_workspace_id_tag_opt
    )

    # Mimic old runtimes by raising an exception when the nonexistent "workspaceId" method is called
    mock_method_chain(
        mock_dbutils,
        methods + ["workspaceId"],
        side_effect=Exception("workspaceId method not defined!"),
    )
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils):
        workspace_host, workspace_id = get_workspace_info_from_dbutils()
        assert workspace_host == "https://mlflow.databricks.com"
        assert workspace_id == "1111"
 def tags(self):
     job_id = databricks_utils.get_job_id()
     job_run_id = databricks_utils.get_job_run_id()
     job_type = databricks_utils.get_job_type()
     webapp_url = databricks_utils.get_webapp_url()
     workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils(
     )
     tags = {
         MLFLOW_SOURCE_NAME:
         ("jobs/{job_id}/run/{job_run_id}".format(job_id=job_id,
                                                  job_run_id=job_run_id)
          if job_id is not None and job_run_id is not None else None),
         MLFLOW_SOURCE_TYPE:
         SourceType.to_string(SourceType.JOB),
     }
     if job_id is not None:
         tags[MLFLOW_DATABRICKS_JOB_ID] = job_id
     if job_run_id is not None:
         tags[MLFLOW_DATABRICKS_JOB_RUN_ID] = job_run_id
     if job_type is not None:
         tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     if workspace_url is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
     if workspace_id is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
     return tags
def test_get_workspace_info_from_dbutils():
    mock_dbutils = mock.MagicMock()
    mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value.toJson.return_value = (  # noqa
        '{"tags": {"orgId" : "1111", "browserHostName": "mlflow.databricks.com"}}'
    )
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils):
        workspace_host, workspace_id = get_workspace_info_from_dbutils()
        assert workspace_host == "https://mlflow.databricks.com"
        assert workspace_id == "1111"
Example #5
0
    def create_model_version(self,
                             name,
                             source,
                             run_id,
                             tags=None,
                             run_link=None):
        """
        Create a new model version from given source or run ID.

        :param name: Name ID for containing registered model.
        :param source: Source path where the MLflow model is stored.
        :param run_id: Run ID from MLflow tracking server that generated the model
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.model_registry.ModelVersionTag` objects.
        :param run_link: Link to the run from an MLflow tracking server that generated this model.
        :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
                 backend.
        """
        tracking_uri = self._tracking_client.tracking_uri
        # for Databricks backends, we support automatically populating the run link field
        if is_databricks_uri(
                tracking_uri
        ) and tracking_uri != self._registry_uri and not run_link:
            # if using the default Databricks tracking URI and in a notebook, we can automatically
            # figure out the run-link.
            if is_databricks_default_tracking_uri(
                    tracking_uri) and is_in_databricks_notebook():
                # use DBUtils to determine workspace information.
                workspace_host, workspace_id = get_workspace_info_from_dbutils(
                )
            else:
                # in this scenario, we're not able to automatically extract the workspace ID
                # to proceed, and users will need to pass in a databricks profile with the scheme:
                # databricks://scope/prefix and store the host and workspace-ID as a secret in the
                # Databricks Secret Manager with scope=<scope> and key=<prefix>-workspaceid.
                workspace_host, workspace_id = \
                    get_workspace_info_from_databricks_secrets(tracking_uri)
                if not workspace_id:
                    print(
                        "No workspace ID specified; if your Databricks workspaces share the same"
                        " host URL, you may want to specify the workspace ID (along with the host"
                        " information in the secret manager) for run lineage tracking. For more"
                        " details on how to specify this information in the secret manager,"
                        " please refer to the model registry documentation.")
            # retrieve experiment ID of the run for the URL
            experiment_id = self.get_run(run_id).info.experiment_id
            if workspace_host and run_id and experiment_id:
                run_link = construct_run_url(workspace_host, experiment_id,
                                             run_id, workspace_id)
        return self._get_registry_client().create_model_version(
            name=name,
            source=source,
            run_id=run_id,
            tags=tags,
            run_link=run_link)
Example #6
0
def test_get_workspace_info_from_dbutils():
    mock_dbutils = mock.MagicMock()
    mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext\
        .return_value.toJson.return_value = '{"extraContext":' \
                                            '{"api_url": "https://mlflow.databricks.com"},' \
                                            '"tags": {"orgId" : "1111"}}'
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils",
                    return_value=mock_dbutils):
        workspace_host, workspace_id = get_workspace_info_from_dbutils()
        assert workspace_host == 'https://mlflow.databricks.com'
        assert workspace_id == '1111'
Example #7
0
def test_get_workspace_info_from_dbutils():
    mock_dbutils = mock.MagicMock()
    mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value.browserHostName.return_value.get.return_value = (  # noqa
        "mlflow.databricks.com")
    mock_dbutils.notebook.entry_point.getDbutils.return_value.notebook.return_value.getContext.return_value.workspaceId.return_value.get.return_value = (  # noqa
        "1111")
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils",
                    return_value=mock_dbutils):
        workspace_host, workspace_id = get_workspace_info_from_dbutils()
        assert workspace_host == "https://mlflow.databricks.com"
        assert workspace_id == "1111"
def test_get_workspace_info_from_dbutils_no_browser_host_name():
    mock_dbutils = mock.MagicMock()
    methods = ["notebook.entry_point.getDbutils", "notebook", "getContext"]
    mock_method_chain(mock_dbutils, methods + ["browserHostName", "get"], return_value=None)
    mock_method_chain(
        mock_dbutils, methods + ["apiUrl", "get"], return_value="https://mlflow.databricks.com"
    )
    mock_method_chain(mock_dbutils, methods + ["workspaceId", "get"], return_value="1111")
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils):
        workspace_host, workspace_id = get_workspace_info_from_dbutils()
        assert workspace_host == "https://mlflow.databricks.com"
        assert workspace_id == "1111"
Example #9
0
 def tags(self):
     notebook_id = databricks_utils.get_notebook_id()
     notebook_path = databricks_utils.get_notebook_path()
     webapp_url = databricks_utils.get_webapp_url()
     workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils(
     )
     tags = {
         MLFLOW_SOURCE_NAME: notebook_path,
         MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
     }
     if notebook_id is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
     if notebook_path is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     if workspace_url is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
     if workspace_id is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
     return tags
def test_get_workspace_info_from_dbutils_when_no_dbutils_available():
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=None):
        workspace_host, workspace_id = get_workspace_info_from_dbutils()
        assert workspace_host is None
        assert workspace_id is None