Exemple #1
0
def _get_experiment_id():
    # TODO: Replace with None for 1.0, leaving for 0.9.1 release backcompat with existing servers
    deprecated_default_exp_id = "0"

    return (_active_experiment_id or _get_experiment_id_from_env() or
            (is_in_databricks_notebook()
             and get_notebook_id())) or deprecated_default_exp_id
def test_use_repl_context_if_available(tmpdir):
    # Simulate a case where `dbruntime.databricks_repl_context.get_context` is unavailable.
    with pytest.raises(ModuleNotFoundError,
                       match="No module named 'dbruntime'"):
        from dbruntime.databricks_repl_context import get_context  # pylint: disable=unused-import

    command_context_mock = mock.MagicMock()
    command_context_mock.jobId().get.return_value = "job_id"
    with mock.patch(
            "mlflow.utils.databricks_utils._get_command_context",
            return_value=command_context_mock) as mock_get_command_context:
        assert databricks_utils.get_job_id() == "job_id"
        mock_get_command_context.assert_called_once()

    # Create a fake databricks_repl_context module
    tmpdir.mkdir("dbruntime").join("databricks_repl_context.py").write("""
def get_context():
    pass
""")
    sys.path.append(tmpdir.strpath)

    # Simulate a case where the REPL context object is not initialized.
    with mock.patch(
            "dbruntime.databricks_repl_context.get_context",
            return_value=None,
    ) as mock_get_context, mock.patch(
            "mlflow.utils.databricks_utils._get_command_context",
            return_value=command_context_mock) as mock_get_command_context:
        assert databricks_utils.get_job_id() == "job_id"
        mock_get_command_context.assert_called_once()

    with mock.patch(
            "dbruntime.databricks_repl_context.get_context",
            return_value=mock.MagicMock(jobId="job_id"),
    ) as mock_get_context, mock.patch(
            "mlflow.utils.databricks_utils._get_dbutils") as mock_dbutils:
        assert databricks_utils.get_job_id() == "job_id"
        mock_get_context.assert_called_once()
        mock_dbutils.assert_not_called()

    with mock.patch(
            "dbruntime.databricks_repl_context.get_context",
            return_value=mock.MagicMock(notebookId="notebook_id"),
    ) as mock_get_context, mock.patch(
            "mlflow.utils.databricks_utils._get_property_from_spark_context"
    ) as mock_spark_context:
        assert databricks_utils.get_notebook_id() == "notebook_id"
        mock_get_context.assert_called_once()
        mock_spark_context.assert_not_called()

    with mock.patch(
            "dbruntime.databricks_repl_context.get_context",
            return_value=mock.MagicMock(isInCluster=True),
    ) as mock_get_context, mock.patch(
            "mlflow.utils._spark_utils._get_active_spark_session"
    ) as mock_spark_session:
        assert databricks_utils.is_in_cluster()
        mock_get_context.assert_called_once()
        mock_spark_session.assert_not_called()
Exemple #3
0
    def request_headers(self):
        request_headers = {}
        if databricks_utils.is_in_databricks_notebook():
            request_headers["notebook_id"] = databricks_utils.get_notebook_id()
        if databricks_utils.is_in_databricks_job():
            request_headers["job_id"] = databricks_utils.get_job_id()
            request_headers["job_run_id"] = databricks_utils.get_job_run_id()
            request_headers["job_type"] = databricks_utils.get_job_type()
        if databricks_utils.is_in_cluster():
            request_headers["cluster_id"] = databricks_utils.get_cluster_id()

        return request_headers
Exemple #4
0
 def tags(self):
     notebook_id = databricks_utils.get_notebook_id()
     notebook_path = databricks_utils.get_notebook_path()
     webapp_url = databricks_utils.get_webapp_url()
     tags = {
         MLFLOW_SOURCE_NAME: notebook_path,
         MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK)
     }
     if notebook_id is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
     if notebook_path is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     return tags
Exemple #5
0
 def tags(self):
     notebook_id = databricks_utils.get_notebook_id()
     notebook_path = databricks_utils.get_notebook_path()
     webapp_url = databricks_utils.get_webapp_url()
     workspace_url = databricks_utils.get_workspace_url()
     workspace_url_fallback, workspace_id = databricks_utils.get_workspace_info_from_dbutils()
     tags = {
         MLFLOW_SOURCE_NAME: notebook_path,
         MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
     }
     if notebook_id is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
     if notebook_path is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     if workspace_url is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
     elif workspace_url_fallback is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url_fallback
     if workspace_id is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
     return tags
Exemple #6
0
def start_run(run_uuid=None,
              experiment_id=None,
              source_name=None,
              source_version=None,
              entry_point_name=None,
              source_type=None,
              run_name=None):
    """
    Start a new MLflow run, setting it as the active run under which metrics and parameters
    will be logged. The return value can be used as a context manager within a ``with`` block;
    otherwise, you must call ``end_run()`` to terminate the current run.

    If you pass a ``run_uuid`` or the ``MLFLOW_RUN_ID`` environment variable is set,
    ``start_run`` attempts to resume a run with the specified run ID and
    other parameters are ignored. ``run_uuid`` takes precedence over ``MLFLOW_RUN_ID``.

    :param run_uuid: If specified, get the run with the specified UUID and log parameters
                     and metrics under that run. The run's end time is unset and its status
                     is set to running, but the run's other attributes (``source_version``,
                     ``source_type``, etc.) are not changed.
    :param experiment_id: ID of the experiment under which to create the current run (applicable
                          only when ``run_uuid`` is not specified). If ``experiment_id`` argument
                          is unspecified, will look for valid experiment in the following order:
                          activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_ID`` env variable,
                          or the default experiment.
    :param source_name: Name of the source file or URI of the project to be associated with the run.
                        If none provided defaults to the current file.
    :param source_version: Optional Git commit hash to associate with the run.
    :param entry_point_name: Optional name of the entry point for the current run.
    :param source_type: Integer :py:class:`mlflow.entities.SourceType` describing the type
                        of the run ("local", "project", etc.). Defaults to
                        :py:class:`mlflow.entities.SourceType.LOCAL` ("local").
    :param run_name: Name of new run. Used only when ``run_uuid`` is unspecified.
    :return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping
             the run's state.
    """
    global _active_run
    if _active_run:
        raise Exception(
            "Run with UUID %s is already active, unable to start nested "
            "run" % _active_run.info.run_uuid)
    existing_run_uuid = run_uuid or os.environ.get(_RUN_ID_ENV_VAR, None)
    if existing_run_uuid:
        _validate_run_id(existing_run_uuid)
        active_run_obj = MlflowClient().get_run(existing_run_uuid)
    else:
        exp_id_for_run = experiment_id or _get_experiment_id()
        if is_in_databricks_notebook():
            databricks_tags = {}
            notebook_id = get_notebook_id()
            notebook_path = get_notebook_path()
            webapp_url = get_webapp_url()
            if notebook_id is not None:
                databricks_tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
            if notebook_path is not None:
                databricks_tags[
                    MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
            if webapp_url is not None:
                databricks_tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
            active_run_obj = MlflowClient().create_run(
                experiment_id=exp_id_for_run,
                run_name=run_name,
                source_name=notebook_path,
                source_version=source_version or _get_source_version(),
                entry_point_name=entry_point_name,
                source_type=SourceType.NOTEBOOK,
                tags=databricks_tags)
        else:
            active_run_obj = MlflowClient().create_run(
                experiment_id=exp_id_for_run,
                run_name=run_name,
                source_name=source_name or _get_source_name(),
                source_version=source_version or _get_source_version(),
                entry_point_name=entry_point_name,
                source_type=source_type or _get_source_type())
    _active_run = ActiveRun(active_run_obj)
    return _active_run
Exemple #7
0
def _get_experiment_id():
    return int(_active_experiment_id or
               env.get_env(_EXPERIMENT_ID_ENV_VAR) or
               (env.get_env(_AUTODETECT_EXPERIMENT) and
                is_in_databricks_notebook() and get_notebook_id()) or
               Experiment.DEFAULT_EXPERIMENT_ID)
Exemple #8
0
 def get_experiment_id(self):
     return databricks_utils.get_notebook_id()
Exemple #9
0
def _get_experiment_id():
    return int(_active_experiment_id or
               _get_experiment_id_from_env() or
               (is_in_databricks_notebook() and get_notebook_id()) or
               Experiment.DEFAULT_EXPERIMENT_ID)