def test_databricks_notebook_run_context_tags_nones():
    patch_notebook_id = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_id", return_value=None)
    patch_notebook_path = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_path", return_value=None)
    patch_webapp_url = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url", return_value=None)

    with patch_notebook_id, patch_notebook_path, patch_webapp_url:
        assert DatabricksNotebookRunContext().tags() == {
            MLFLOW_SOURCE_NAME: None,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
        }
Ejemplo n.º 2
0
def test_run_local_git_repo(
        local_git_repo,
        local_git_repo_uri,
        tracking_uri_mock,  # pylint: disable=unused-argument
        use_start_run,
        version):
    if version is not None:
        uri = local_git_repo_uri + "#" + TEST_PROJECT_NAME
    else:
        uri = os.path.join("%s/" % local_git_repo, TEST_PROJECT_NAME)
    if version == "git-commit":
        version = _get_version_local_git_repo(local_git_repo)
    submitted_run = mlflow.projects.run(
        uri,
        entry_point="test_tracking",
        version=version,
        parameters={"use_start_run": use_start_run},
        use_conda=False,
        experiment_id=0)

    # Blocking runs should be finished when they return
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Test that we can call wait() on a synchronous run & that the run has the correct
    # status after calling wait().
    submitted_run.wait()
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Validate run contents in the FileStore
    run_uuid = submitted_run.run_id
    mlflow_service = mlflow.tracking.MlflowClient()
    run_infos = mlflow_service.list_run_infos(
        experiment_id=0, run_view_type=ViewType.ACTIVE_ONLY)
    assert "file:" in run_infos[0].source_name
    assert len(run_infos) == 1
    store_run_uuid = run_infos[0].run_uuid
    assert run_uuid == store_run_uuid
    run = mlflow_service.get_run(run_uuid)

    assert run.info.status == RunStatus.FINISHED
    assert run.data.params == {"use_start_run": use_start_run}
    assert run.data.metrics == {"some_key": 3}

    tags = run.data.tags
    assert "file:" in tags[MLFLOW_SOURCE_NAME]
    assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT)
    assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking"

    if version == "master":
        assert tags[MLFLOW_GIT_BRANCH] == "master"
        assert tags[MLFLOW_GIT_REPO_URL] == local_git_repo_uri
        assert tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] == "master"
        assert tags[LEGACY_MLFLOW_GIT_REPO_URL] == local_git_repo_uri
Ejemplo n.º 3
0
def test_start_run_defaults_databricks_notebook(empty_active_run_stack):

    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id
    )
    databricks_notebook_patch = mock.patch(
        "mlflow.utils.databricks_utils.is_in_databricks_notebook", return_value=True
    )
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user", return_value=mock_user
    )
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context.git_context._get_source_version", return_value=mock_source_version
    )
    mock_notebook_id = mock.Mock()
    notebook_id_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_id", return_value=mock_notebook_id
    )
    mock_notebook_path = mock.Mock()
    notebook_path_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_path", return_value=mock_notebook_path
    )
    mock_webapp_url = mock.Mock()
    webapp_url_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url", return_value=mock_webapp_url
    )

    expected_tags = {
        mlflow_tags.MLFLOW_USER: mock_user,
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_notebook_path,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path,
        mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with experiment_id_patch, databricks_notebook_patch, user_patch, source_version_patch, \
            notebook_id_patch, notebook_path_patch, webapp_url_patch, create_run_patch:
        active_run = start_run()
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id,
            tags=expected_tags
        )
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 4
0
def test_databricks_job_run_context_tags_nones():
    patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id", return_value=None)
    patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id", return_value=None)
    patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type", return_value=None)
    patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None)
    patch_workspace_info = mock.patch(
        "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None)
    )

    with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info:
        assert DatabricksJobRunContext().tags() == {
            MLFLOW_SOURCE_NAME: None,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        }
Ejemplo n.º 5
0
def test_start_run_overrides_databricks_notebook(empty_active_run_stack):

    databricks_notebook_patch = mock.patch(
        "mlflow.utils.databricks_utils.is_in_databricks_notebook",
        return_value=True)
    mock_notebook_id = mock.Mock()
    notebook_id_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_id",
        return_value=mock_notebook_id)
    mock_notebook_path = mock.Mock()
    notebook_path_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_path",
        return_value=mock_notebook_path)
    mock_webapp_url = mock.Mock()
    webapp_url_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url",
        return_value=mock_webapp_url)

    mock_experiment_id = mock.Mock()
    mock_source_name = mock.Mock()
    source_type = SourceType.JOB
    mock_source_version = mock.Mock()
    mock_entry_point_name = mock.Mock()
    mock_run_name = mock.Mock()

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(source_type),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT: mock_entry_point_name,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path,
        mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with databricks_notebook_patch, notebook_id_patch, notebook_path_patch, webapp_url_patch, \
            create_run_patch:
        active_run = start_run(experiment_id=mock_experiment_id,
                               source_name=mock_source_name,
                               source_version=mock_source_version,
                               entry_point_name=mock_entry_point_name,
                               source_type=source_type,
                               run_name=mock_run_name)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id,
            run_name=mock_run_name,
            tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 6
0
def test_start_run_creates_new_run_with_user_specified_tags():

    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id",
        return_value=mock_experiment_id)
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user",
        return_value=mock_user)
    mock_source_name = mock.Mock()
    source_name_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_name",
        return_value=mock_source_name)
    source_type_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_type",
        return_value=SourceType.NOTEBOOK)
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context.git_context._get_source_version",
        return_value=mock_source_version)
    user_specified_tags = {
        "ml_task": "regression",
        "num_layers": 7,
        mlflow_tags.MLFLOW_USER: "******",
    }
    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE:
        SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_USER: "******",
        "ml_task": "regression",
        "num_layers": 7,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
            experiment_id_patch,
            user_patch,
            source_name_patch,
            source_type_patch,
            source_version_patch,
            create_run_patch,
    ):
        active_run = start_run(tags=user_specified_tags)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 7
0
def test_databricks_notebook_run_context_tags():
    patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id")
    patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path")
    patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")

    with patch_notebook_id as notebook_id_mock, patch_notebook_path as notebook_path_mock, \
            patch_webapp_url as webapp_url_mock:
        assert DatabricksNotebookRunContext().tags() == {
            MLFLOW_SOURCE_NAME: notebook_path_mock.return_value,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
            MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value,
            MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value,
            MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value
        }
Ejemplo n.º 8
0
def _create_run(uri, experiment_id, work_dir, version, entry_point, parameters):
    """
    Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI,
    entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be
    used to report additional data about the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking._tracking_service.utils._get_git_url_if_present(_expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    source_version = _get_git_commit(work_dir)
    existing_run = fluent.active_run()
    if existing_run:
        parent_run_id = existing_run.info.run_id
    else:
        parent_run_id = None

    tags = {
        MLFLOW_USER: _get_user(),
        MLFLOW_SOURCE_NAME: source_name,
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT),
        MLFLOW_PROJECT_ENTRY_POINT: entry_point,
    }
    if source_version is not None:
        tags[MLFLOW_GIT_COMMIT] = source_version
    if parent_run_id is not None:
        tags[MLFLOW_PARENT_RUN_ID] = parent_run_id

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        tags[MLFLOW_GIT_REPO_URL] = repo_url
        tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        tags[MLFLOW_GIT_BRANCH] = version
        tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] = version
    active_run = tracking.MlflowClient().create_run(experiment_id=experiment_id, tags=tags)

    project = _project_spec.load_project(work_dir)
    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(parameters, storage_dir=None)
    params_list = [
        Param(key, value) for key, value in list(final_params.items()) + list(extra_params.items())
    ]
    tracking.MlflowClient().log_batch(active_run.info.run_id, params=params_list)
    return active_run
Ejemplo n.º 9
0
    def test_create_run(self):
        experiment_id = self._experiment_factory('test_create_run')
        expected = self._get_run_configs('booyya', experiment_id=experiment_id)

        tags = [RunTag('3', '4'), RunTag('1', '2')]
        actual = self.store.create_run(expected["experiment_id"], expected["user_id"],
                                       expected["name"],
                                       SourceType.from_string(expected["source_type"]),
                                       expected["source_name"], expected["entry_point_name"],
                                       expected["start_time"], expected["source_version"],
                                       tags, None)

        self.assertEqual(actual.info.experiment_id, expected["experiment_id"])
        self.assertEqual(actual.info.user_id, expected["user_id"])
        self.assertEqual(actual.info.name, 'booyya')
        self.assertEqual(actual.info.source_type, SourceType.from_string(expected["source_type"]))
        self.assertEqual(actual.info.source_name, expected["source_name"])
        self.assertEqual(actual.info.source_version, expected["source_version"])
        self.assertEqual(actual.info.entry_point_name, expected["entry_point_name"])
        self.assertEqual(actual.info.start_time, expected["start_time"])
        self.assertEqual(len(actual.data.tags), 3)

        name_tag = models.SqlTag(key='mlflow.runName', value='booyya').to_mlflow_entity()
        self.assertListEqual(actual.data.tags, tags + [name_tag])
Ejemplo n.º 10
0
 def tags(self):
     notebook_id = databricks_utils.get_notebook_id()
     notebook_path = databricks_utils.get_notebook_path()
     webapp_url = databricks_utils.get_webapp_url()
     tags = {
         MLFLOW_SOURCE_NAME: notebook_path,
         MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK)
     }
     if notebook_id is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
     if notebook_path is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     return tags
Ejemplo n.º 11
0
    def create_run(self, experiment_id, user_id=None, run_name=None, start_time=None,
                   parent_run_id=None, tags=None):
        """
        Create a :py:class:`mlflow.entities.Run` object that can be associated with
        metrics, parameters, artifacts, etc.
        Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code.
        Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by
        :py:func:`mlflow.log_param`.

        :param user_id: If not provided, use the current user as a default.
        :param start_time: If not provided, use the current timestamp.
        :param parent_run_id Optional parent run ID - takes precedence over parent run ID included
                             in the `tags` argument.
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.RunTag` objects.
        :return: :py:class:`mlflow.entities.Run` that was created.
        """

        tags = tags if tags else {}

        # Extract run attributes from tags
        # This logic is temporary; by the 1.0 release, this information will only be stored in tags
        # and will not be available as attributes of the run
        final_parent_run_id =\
            tags.get(MLFLOW_PARENT_RUN_ID) if parent_run_id is None else parent_run_id
        source_name = tags.get(MLFLOW_SOURCE_NAME, "Python Application")
        source_version = tags.get(MLFLOW_GIT_COMMIT)
        entry_point_name = tags.get(MLFLOW_PROJECT_ENTRY_POINT)

        source_type_string = tags.get(MLFLOW_SOURCE_TYPE)
        if source_type_string is None:
            source_type = SourceType.LOCAL
        else:
            source_type = SourceType.from_string(source_type_string)

        return self.store.create_run(
            experiment_id=experiment_id,
            user_id=user_id if user_id is not None else _get_user_id(),
            run_name=run_name,
            start_time=start_time or int(time.time() * 1000),
            tags=[RunTag(key, value) for (key, value) in iteritems(tags)],
            # The below arguments remain set for backwards compatability:
            parent_run_id=final_parent_run_id,
            source_type=source_type,
            source_name=source_name,
            entry_point_name=entry_point_name,
            source_version=source_version
        )
Ejemplo n.º 12
0
def test_start_run_defaults(empty_active_run_stack):  # pylint: disable=unused-argument

    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id
    )
    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False
    )
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user", return_value=mock_user
    )
    mock_source_name = mock.Mock()
    source_name_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_name", return_value=mock_source_name
    )
    source_type_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.NOTEBOOK
    )
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context.git_context._get_source_version", return_value=mock_source_version
    )

    expected_tags = {
        mlflow_tags.MLFLOW_USER: mock_user,
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
        experiment_id_patch,
        databricks_notebook_patch,
        user_patch,
        source_name_patch,
        source_type_patch,
        source_version_patch,
        create_run_patch,
    ):
        active_run = start_run()
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags
        )
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 13
0
 def _get_run_configs(self, name='test', experiment_id=None):
     return {
         'experiment_id': experiment_id,
         'name': name,
         'user_id': 'Anderson',
         'run_uuid': uuid.uuid4().hex,
         'status': RunStatus.to_string(RunStatus.SCHEDULED),
         'source_type': SourceType.to_string(SourceType.NOTEBOOK),
         'source_name': 'Python application',
         'entry_point_name': 'main.py',
         'start_time': int(time.time()),
         'end_time': int(time.time()),
         'source_version': mlflow.__version__,
         'lifecycle_stage': entities.LifecycleStage.ACTIVE,
         'artifact_uri': '//'
     }
Ejemplo n.º 14
0
def test_databricks_job_run_context_tags():
    patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id")
    patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id")
    patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type")
    patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")

    with patch_job_id as job_id_mock, patch_job_run_id as job_run_id_mock, patch_job_type as job_type_mock, patch_webapp_url as webapp_url_mock:  # noqa
        assert DatabricksJobRunContext().tags() == {
            MLFLOW_SOURCE_NAME: "jobs/{job_id}/run/{job_run_id}".format(
                job_id=job_id_mock.return_value, job_run_id=job_run_id_mock.return_value
            ),
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
            MLFLOW_DATABRICKS_JOB_ID: job_id_mock.return_value,
            MLFLOW_DATABRICKS_JOB_RUN_ID: job_run_id_mock.return_value,
            MLFLOW_DATABRICKS_JOB_TYPE: job_type_mock.return_value,
            MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value,
        }
Ejemplo n.º 15
0
def test_client_create_run_overrides(mock_store):

    experiment_id = mock.Mock()
    user_id = mock.Mock()
    run_name = mock.Mock()
    start_time = mock.Mock()
    tags = {
        MLFLOW_PARENT_RUN_ID: mock.Mock(),
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        MLFLOW_SOURCE_NAME: mock.Mock(),
        MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(),
        MLFLOW_GIT_COMMIT: mock.Mock(),
        "other-key": "other-value"
    }

    MlflowClient().create_run(experiment_id, user_id, run_name, start_time,
                              None, tags)

    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user_id,
        run_name=run_name,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()],
        parent_run_id=tags[MLFLOW_PARENT_RUN_ID],
        source_type=SourceType.JOB,
        source_name=tags[MLFLOW_SOURCE_NAME],
        entry_point_name=tags[MLFLOW_PROJECT_ENTRY_POINT],
        source_version=tags[MLFLOW_GIT_COMMIT])
    mock_store.reset_mock()
    parent_run_id = "mock-parent-run-id"
    MlflowClient().create_run(experiment_id, user_id, run_name, start_time,
                              parent_run_id, tags)
    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user_id,
        run_name=run_name,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()],
        parent_run_id=parent_run_id,
        source_type=SourceType.JOB,
        source_name=tags[MLFLOW_SOURCE_NAME],
        entry_point_name=tags[MLFLOW_PROJECT_ENTRY_POINT],
        source_version=tags[MLFLOW_GIT_COMMIT])
Ejemplo n.º 16
0
    def create_run(self, experiment_id, user_id, run_name, source_type,
                   source_name, entry_point_name, start_time, source_version,
                   tags, parent_run_id):
        with self.ManagedSessionMaker() as session:
            experiment = self.get_experiment(experiment_id)

            if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
                raise MlflowException(
                    'Experiment id={} must be active'.format(experiment_id),
                    INVALID_STATE)

            run_uuid = uuid.uuid4().hex
            artifact_location = build_path(
                experiment.artifact_location, run_uuid,
                SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
            run = SqlRun(name=run_name or "",
                         artifact_uri=artifact_location,
                         run_uuid=run_uuid,
                         experiment_id=experiment_id,
                         source_type=SourceType.to_string(source_type),
                         source_name=source_name,
                         entry_point_name=entry_point_name,
                         user_id=user_id,
                         status=RunStatus.to_string(RunStatus.RUNNING),
                         start_time=start_time,
                         end_time=None,
                         source_version=source_version,
                         lifecycle_stage=LifecycleStage.ACTIVE)

            tags_dict = {}
            for tag in tags:
                tags_dict[tag.key] = tag.value
            if parent_run_id:
                tags_dict[MLFLOW_PARENT_RUN_ID] = parent_run_id
            if run_name:
                tags_dict[MLFLOW_RUN_NAME] = run_name
            run.tags = [
                SqlTag(key=key, value=value)
                for key, value in tags_dict.items()
            ]
            self._save_to_db(objs=run, session=session)

            return run.to_mlflow_entity()
Ejemplo n.º 17
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the initialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        if isinstance(model, SqlRun):
            if base is RunData:
                # Run data contains list for metrics, params and tags
                # so obj will be a list so we need to convert those items
                if k == 'metrics':
                    # only get latest recorded metrics per key
                    metrics = {}
                    for o in obj:
                        existing_metric = metrics.get(o.key)
                        if (existing_metric is None)\
                            or ((o.step, o.timestamp, o.value) >=
                                (existing_metric.step, existing_metric.timestamp,
                                 existing_metric.value)):
                            metrics[o.key] = Metric(o.key, o.value,
                                                    o.timestamp, o.step)
                    obj = list(metrics.values())
                elif k == 'params':
                    obj = [Param(o.key, o.value) for o in obj]
                elif k == 'tags':
                    obj = [RunTag(o.key, o.value) for o in obj]
            elif base is RunInfo:
                if k == 'source_type':
                    obj = SourceType.from_string(obj)
                elif k == "status":
                    obj = RunStatus.from_string(obj)
                elif k == "experiment_id":
                    obj = str(obj)

        # Our data model defines experiment_ids as ints, but the in-memory representation was
        # changed to be a string in time for 1.0.
        if isinstance(model, SqlExperiment) and k == "experiment_id":
            obj = str(obj)

        config[k] = obj
    return base(**config)
Ejemplo n.º 18
0
def test_run(
        tmpdir,  # pylint: disable=unused-argument
        patch_user,  # pylint: disable=unused-argument
        tracking_uri_mock,  # pylint: disable=unused-argument
        use_start_run):
    submitted_run = mlflow.projects.run(
        TEST_PROJECT_DIR,
        entry_point="test_tracking",
        parameters={"use_start_run": use_start_run},
        use_conda=False,
        experiment_id=FileStore.DEFAULT_EXPERIMENT_ID)
    assert submitted_run.run_id is not None
    # Blocking runs should be finished when they return
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Test that we can call wait() on a synchronous run & that the run has the correct
    # status after calling wait().
    submitted_run.wait()
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Validate run contents in the FileStore
    run_id = submitted_run.run_id
    mlflow_service = mlflow.tracking.MlflowClient()

    run_infos = mlflow_service.list_run_infos(
        experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
        run_view_type=ViewType.ACTIVE_ONLY)
    assert len(run_infos) == 1
    store_run_id = run_infos[0].run_id
    assert run_id == store_run_id
    run = mlflow_service.get_run(run_id)

    assert run.info.status == RunStatus.to_string(RunStatus.FINISHED)

    assert run.data.params == {"use_start_run": use_start_run}
    assert run.data.metrics == {"some_key": 3}

    tags = run.data.tags
    assert tags[MLFLOW_USER] == MOCK_USER
    assert "file:" in tags[MLFLOW_SOURCE_NAME]
    assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT)
    assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking"
Ejemplo n.º 19
0
def test_start_run_with_parent():

    parent_run = mock.Mock()
    mock_experiment_id = mock.Mock()
    mock_source_name = mock.Mock()

    active_run_stack_patch = mock.patch("mlflow.tracking.fluent._active_run_stack", [parent_run])

    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False
    )
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user", return_value=mock_user
    )
    source_name_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_name", return_value=mock_source_name
    )

    expected_tags = {
        mlflow_tags.MLFLOW_USER: mock_user,
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL),
        mlflow_tags.MLFLOW_PARENT_RUN_ID: parent_run.info.run_id,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
        databricks_notebook_patch,
        active_run_stack_patch,
        create_run_patch,
        user_patch,
        source_name_patch,
    ):
        active_run = start_run(experiment_id=mock_experiment_id, nested=True)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags
        )
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
def test_databricks_job_run_context_tags():
    patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id")
    patch_job_run_id = mock.patch(
        "mlflow.utils.databricks_utils.get_job_run_id")
    patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type")
    patch_webapp_url = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url")
    patch_workspace_info = mock.patch(
        "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
        return_value=("https://databricks.com", "123456"),
    )

    with multi_context(patch_job_id, patch_job_run_id, patch_job_type,
                       patch_webapp_url, patch_workspace_info) as (
                           job_id_mock,
                           job_run_id_mock,
                           job_type_mock,
                           webapp_url_mock,
                           workspace_info_mock,
                       ):
        assert DatabricksJobRunContext().tags() == {
            MLFLOW_SOURCE_NAME:
            "jobs/{job_id}/run/{job_run_id}".format(
                job_id=job_id_mock.return_value,
                job_run_id=job_run_id_mock.return_value),
            MLFLOW_SOURCE_TYPE:
            SourceType.to_string(SourceType.JOB),
            MLFLOW_DATABRICKS_JOB_ID:
            job_id_mock.return_value,
            MLFLOW_DATABRICKS_JOB_RUN_ID:
            job_run_id_mock.return_value,
            MLFLOW_DATABRICKS_JOB_TYPE:
            job_type_mock.return_value,
            MLFLOW_DATABRICKS_WEBAPP_URL:
            webapp_url_mock.return_value,
            MLFLOW_DATABRICKS_WORKSPACE_URL:
            workspace_info_mock.return_value[0],
            MLFLOW_DATABRICKS_WORKSPACE_ID:
            workspace_info_mock.return_value[1],
        }
Ejemplo n.º 21
0
 def tags(self):
     job_id = databricks_utils.get_job_id()
     job_run_id = databricks_utils.get_job_run_id()
     job_type = databricks_utils.get_job_type()
     webapp_url = databricks_utils.get_webapp_url()
     tags = {
         MLFLOW_SOURCE_NAME:
         ("jobs/{job_id}/run/{job_run_id}".format(job_id=job_id,
                                                  job_run_id=job_run_id)
          if job_id is not None and job_run_id is not None else None),
         MLFLOW_SOURCE_TYPE:
         SourceType.to_string(SourceType.JOB),
     }
     if job_id is not None:
         tags[MLFLOW_DATABRICKS_JOB_ID] = job_id
     if job_run_id is not None:
         tags[MLFLOW_DATABRICKS_JOB_RUN_ID] = job_run_id
     if job_type is not None:
         tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     return tags
Ejemplo n.º 22
0
def test_start_run_with_parent():

    parent_run = mock.Mock()
    active_run_stack_patch = mock.patch(
        "mlflow.tracking.fluent._active_run_stack", [parent_run])

    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False)

    mock_experiment_id = mock.Mock()
    mock_source_name = mock.Mock()
    source_type = SourceType.JOB
    mock_source_version = mock.Mock()
    mock_entry_point_name = mock.Mock()
    mock_run_name = mock.Mock()

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(source_type),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT: mock_entry_point_name,
        mlflow_tags.MLFLOW_PARENT_RUN_ID: parent_run.info.run_id
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with databricks_notebook_patch, active_run_stack_patch, create_run_patch:
        active_run = start_run(experiment_id=mock_experiment_id,
                               source_name=mock_source_name,
                               source_version=mock_source_version,
                               entry_point_name=mock_entry_point_name,
                               source_type=source_type,
                               run_name=mock_run_name,
                               nested=True)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id,
            run_name=mock_run_name,
            tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 23
0
    def create_run(self, experiment_id, user_id, run_name, source_type,
                   source_name, entry_point_name, start_time, source_version,
                   tags, parent_run_id):
        experiment = self.get_experiment(experiment_id)

        if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
            raise MlflowException(
                'Experiment id={} must be active'.format(experiment_id),
                INVALID_STATE)

        run_uuid = uuid.uuid4().hex
        artifact_location = build_path(experiment.artifact_location, run_uuid,
                                       SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
        run = SqlRun(name=run_name or "",
                     artifact_uri=artifact_location,
                     run_uuid=run_uuid,
                     experiment_id=experiment_id,
                     source_type=SourceType.to_string(source_type),
                     source_name=source_name,
                     entry_point_name=entry_point_name,
                     user_id=user_id,
                     status=RunStatus.to_string(RunStatus.RUNNING),
                     start_time=start_time,
                     end_time=None,
                     source_version=source_version,
                     lifecycle_stage=LifecycleStage.ACTIVE)

        for tag in tags:
            run.tags.append(SqlTag(key=tag.key, value=tag.value))
        if parent_run_id:
            run.tags.append(
                SqlTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
        if run_name:
            run.tags.append(SqlTag(key=MLFLOW_RUN_NAME, value=run_name))

        self._save_to_db([run])

        return run.to_mlflow_entity()
Ejemplo n.º 24
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           name=self.name,
                           source_type=SourceType.from_string(
                               self.source_type),
                           source_name=self.source_name,
                           entry_point_name=self.entry_point_name,
                           user_id=self.user_id,
                           status=RunStatus.from_string(self.status),
                           start_time=self.start_time,
                           end_time=self.end_time,
                           source_version=self.source_version,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 25
0
def _setup_remote(azureml_run):
    from azureml.mlflow._internal.constants import MLflowRunEnvVars
    logger.debug("Setting up a Remote MLflow run")
    tracking_uri = azureml_run.experiment.workspace.get_mlflow_tracking_uri() + "&is-remote=True"
    logger.debug("Setting MLflow tracking uri env var")
    os.environ[MLflowRunEnvVars.TRACKING_URI] = tracking_uri
    logger.debug("Setting MLflow run id env var with {}".format(azureml_run.id))
    os.environ[MLflowRunEnvVars.ID] = azureml_run.id
    logger.debug("Setting Mlflow experiment with {}".format(azureml_run.experiment.name))
    os.environ[MLflowRunEnvVars.EXPERIMENT_NAME] = azureml_run.experiment.name
    from mlflow.entities import SourceType

    if not os.environ.get("AZUREML_SECONDARY_INSTANCE"):
        mlflow_tags = {}
        mlflow_source_type_key = 'mlflow.source.type'
        if mlflow_source_type_key not in azureml_run.tags:
            logger.debug("Setting the mlflow tag {}".format(mlflow_source_type_key))
            mlflow_tags[mlflow_source_type_key] = SourceType.to_string(SourceType.JOB)
        mlflow_source_name_key = 'mlflow.source.name'
        if mlflow_source_name_key not in azureml_run.tags:
            logger.debug("Setting the mlflow tag {}".format(mlflow_source_name_key))
            mlflow_tags[mlflow_source_name_key] = azureml_run.get_details()['runDefinition']['script']
        azureml_run.set_tags(mlflow_tags)
Ejemplo n.º 26
0
    def create_run(self, experiment_id, user_id, start_time, tags):
        with self.ManagedSessionMaker() as session:
            experiment = self.get_experiment(experiment_id)
            self._check_experiment_is_active(experiment)

            run_id = uuid.uuid4().hex
            artifact_location = append_to_uri_path(experiment.artifact_location, run_id,
                                                   SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
            run = SqlRun(name="", artifact_uri=artifact_location, run_uuid=run_id,
                         experiment_id=experiment_id,
                         source_type=SourceType.to_string(SourceType.UNKNOWN),
                         source_name="", entry_point_name="",
                         user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING),
                         start_time=start_time, end_time=None,
                         source_version="", lifecycle_stage=LifecycleStage.ACTIVE)

            tags_dict = {}
            for tag in tags:
                tags_dict[tag.key] = tag.value
            run.tags = [SqlTag(key=key, value=value) for key, value in tags_dict.items()]
            self._save_to_db(objs=run, session=session)

            return run.to_mlflow_entity()
Ejemplo n.º 27
0
 def tags(self):
     notebook_id = databricks_utils.get_notebook_id()
     notebook_path = databricks_utils.get_notebook_path()
     webapp_url = databricks_utils.get_webapp_url()
     workspace_url = databricks_utils.get_workspace_url()
     workspace_url_fallback, workspace_id = databricks_utils.get_workspace_info_from_dbutils()
     tags = {
         MLFLOW_SOURCE_NAME: notebook_path,
         MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
     }
     if notebook_id is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
     if notebook_path is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     if workspace_url is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
     elif workspace_url_fallback is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url_fallback
     if workspace_id is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
     return tags
Ejemplo n.º 28
0
    RunTag,
    Metric,
    Param,
    RunData,
    RunInfo,
    SourceType,
    RunStatus,
    Run,
    ViewType,
    ExperimentTag,
)
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.store.db.base_sql_model import Base

SourceTypes = [
    SourceType.to_string(SourceType.NOTEBOOK),
    SourceType.to_string(SourceType.JOB),
    SourceType.to_string(SourceType.LOCAL),
    SourceType.to_string(SourceType.UNKNOWN),
    SourceType.to_string(SourceType.PROJECT),
]

RunStatusTypes = [
    RunStatus.to_string(RunStatus.SCHEDULED),
    RunStatus.to_string(RunStatus.FAILED),
    RunStatus.to_string(RunStatus.FINISHED),
    RunStatus.to_string(RunStatus.RUNNING),
    RunStatus.to_string(RunStatus.KILLED),
]

Ejemplo n.º 29
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """

    __tablename__ = "runs"

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey("experiments.experiment_id"))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship("SqlExperiment",
                              backref=backref("runs", cascade="all"))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (
        CheckConstraint(source_type.in_(SourceTypes), name="source_type"),
        CheckConstraint(status.in_(RunStatusTypes), name="status"),
        CheckConstraint(
            lifecycle_stage.in_(
                LifecycleStage.view_type_to_stages(ViewType.ALL)),
            name="runs_lifecycle_stage",
        ),
        PrimaryKeyConstraint("run_uuid", name="run_pk"),
    )

    @staticmethod
    def get_attribute_name(mlflow_attribute_name):
        """
        Resolves an MLflow attribute name to a `SqlRun` attribute name.
        """
        # Currently, MLflow Search attributes defined in `SearchUtils.VALID_SEARCH_ATTRIBUTE_KEYS`
        # share the same names as their corresponding `SqlRun` attributes. Therefore, this function
        # returns the same attribute name
        return mlflow_attribute_name

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(
            run_uuid=self.run_uuid,
            run_id=self.run_uuid,
            experiment_id=str(self.experiment_id),
            user_id=self.user_id,
            status=self.status,
            start_time=self.start_time,
            end_time=self.end_time,
            lifecycle_stage=self.lifecycle_stage,
            artifact_uri=self.artifact_uri,
        )

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags],
        )

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 30
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship('SqlExperiment',
                              backref=backref('runs', cascade='all'))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (CheckConstraint(source_type.in_(SourceTypes),
                                      name='source_type'),
                      CheckConstraint(status.in_(RunStatusTypes),
                                      name='status'),
                      CheckConstraint(lifecycle_stage.in_(
                          LifecycleStage.view_type_to_stages(ViewType.ALL)),
                                      name='runs_lifecycle_stage'),
                      PrimaryKeyConstraint('run_uuid', name='run_pk'))

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)