Ejemplo n.º 1
0
    def test_run_info(self):
        experiment_id = self._experiment_factory('test exp')
        config = {
            'experiment_id': experiment_id,
            'name': 'test run',
            'user_id': 'Anderson',
            'run_uuid': 'test',
            'status': RunStatus.to_string(RunStatus.SCHEDULED),
            'source_type': SourceType.to_string(SourceType.LOCAL),
            'source_name': 'Python application',
            'entry_point_name': 'main.py',
            'start_time': int(time.time()),
            'end_time': int(time.time()),
            'source_version': mlflow.__version__,
            'lifecycle_stage': entities.LifecycleStage.ACTIVE,
            'artifact_uri': '//'
        }
        run = models.SqlRun(**config).to_mlflow_entity()

        for k, v in config.items():
            v2 = getattr(run.info, k)
            if k == 'source_type':
                self.assertEqual(v, SourceType.to_string(v2))
            elif k == 'status':
                self.assertEqual(v, RunStatus.to_string(v2))
            else:
                self.assertEqual(v, v2)
Ejemplo n.º 2
0
def test_client_create_run_overrides(mock_store):

    experiment_id = mock.Mock()
    user_id = mock.Mock()
    run_name = mock.Mock()
    start_time = mock.Mock()
    tags = {
        MLFLOW_PARENT_RUN_ID: mock.Mock(),
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        MLFLOW_SOURCE_NAME: mock.Mock(),
        MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(),
        MLFLOW_GIT_COMMIT: mock.Mock(),
        "other-key": "other-value"
    }

    MlflowClient().create_run(experiment_id, user_id, run_name, start_time,
                              tags)

    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user_id,
        run_name=run_name,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()],
        parent_run_id=tags[MLFLOW_PARENT_RUN_ID],
        source_type=SourceType.JOB,
        source_name=tags[MLFLOW_SOURCE_NAME],
        entry_point_name=tags[MLFLOW_PROJECT_ENTRY_POINT],
        source_version=tags[MLFLOW_GIT_COMMIT])
Ejemplo n.º 3
0
def _create_run(uri, experiment_id, work_dir, entry_point):
    """
    Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI,
    entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be
    used to report additional data about the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    source_version = _get_git_commit(work_dir)
    existing_run = fluent.active_run()
    if existing_run:
        parent_run_id = existing_run.info.run_id
    else:
        parent_run_id = None

    tags = {
        MLFLOW_USER: _get_user(),
        MLFLOW_SOURCE_NAME: source_name,
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT),
        MLFLOW_PROJECT_ENTRY_POINT: entry_point
    }
    if source_version is not None:
        tags[MLFLOW_GIT_COMMIT] = source_version
    if parent_run_id is not None:
        tags[MLFLOW_PARENT_RUN_ID] = parent_run_id

    active_run = tracking.MlflowClient().create_run(experiment_id=experiment_id, tags=tags)
    return active_run
Ejemplo n.º 4
0
def test_get_experiment_by_id_with_is_in_databricks_job():
    exp_id = 768
    job_id = 123
    exp_name = "jobs:/" + str(job_id)
    job_type_info = "NORMAL"
    with mock.patch(
            "mlflow.tracking.fluent.is_in_databricks_job"
    ) as job_detection_mock, mock.patch(
            "mlflow.tracking.fluent.get_job_type_info"
    ) as job_type_info_mock, mock.patch(
            "mlflow.tracking.fluent.get_job_id") as job_id_mock, mock.patch(
                "mlflow.tracking.fluent.get_experiment_name_from_job_id"
            ) as job_to_experiment_name_mapping_mock, mock.patch.object(
                MlflowClient, "create_experiment", return_value=exp_id):
        job_detection_mock.return_value = True
        job_type_info_mock.return_value = job_type_info
        job_id_mock.return_value = job_id
        job_to_experiment_name_mapping_mock.return_value = exp_name
        tags = {}
        tags[MLFLOW_DATABRICKS_JOB_TYPE_INFO] = job_type_info
        tags[MLFLOW_EXPERIMENT_SOURCE_TYPE] = SourceType.to_string(
            SourceType.JOB)
        tags[MLFLOW_EXPERIMENT_SOURCE_ID] = job_id

        assert _get_experiment_id() == exp_id
        MlflowClient.create_experiment.assert_called_with(exp_name, None, tags)
def test_databricks_notebook_run_context_tags():
    patch_notebook_id = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_id")
    patch_notebook_path = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_path")
    patch_webapp_url = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url")
    patch_workspace_info = mock.patch(
        "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
        return_value=("https://databricks.com", "123456"),
    )

    with multi_context(patch_notebook_id, patch_notebook_path,
                       patch_webapp_url, patch_workspace_info) as (
                           notebook_id_mock,
                           notebook_path_mock,
                           webapp_url_mock,
                           workspace_info_mock,
                       ):
        assert DatabricksNotebookRunContext().tags() == {
            MLFLOW_SOURCE_NAME: notebook_path_mock.return_value,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
            MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value,
            MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value,
            MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value,
            MLFLOW_DATABRICKS_WORKSPACE_URL:
            workspace_info_mock.return_value[0],
            MLFLOW_DATABRICKS_WORKSPACE_ID:
            workspace_info_mock.return_value[1],
        }
def test_databricks_job_default_experiment_id():
    job_id = "job_id"
    exp_name = "jobs:/" + str(job_id)
    patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id",
                              return_value=job_id)
    patch_job_type = mock.patch(
        "mlflow.utils.databricks_utils.get_job_type_info",
        return_value="NORMAL")
    patch_experiment_name_from_job_id = mock.patch(
        "mlflow.utils.databricks_utils.get_experiment_name_from_job_id",
        return_value=exp_name)
    experiment_id = "experiment_id"

    create_experiment = mock.patch.object(MlflowClient,
                                          "create_experiment",
                                          return_value=experiment_id)

    with multi_context(patch_job_id, patch_job_type,
                       patch_experiment_name_from_job_id,
                       create_experiment) as (
                           job_id_mock,
                           job_type_info_mock,
                           experiment_name_from_job_id_mock,
                           create_experiment_mock,
                       ):
        tags = {}
        tags[MLFLOW_DATABRICKS_JOB_TYPE_INFO] = job_type_info_mock.return_value
        tags[MLFLOW_EXPERIMENT_SOURCE_TYPE] = SourceType.to_string(
            SourceType.JOB)
        tags[MLFLOW_EXPERIMENT_SOURCE_ID] = job_id_mock.return_value

        assert DatabricksJobExperimentProvider().get_experiment_id(
        ) == experiment_id
        create_experiment_mock.assert_called_once_with(
            experiment_name_from_job_id_mock.return_value, None, tags)
Ejemplo n.º 7
0
def test_databricks_job_run_context_tags():
    patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id")
    patch_job_run_id = mock.patch(
        "mlflow.utils.databricks_utils.get_job_run_id")
    patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type")
    patch_webapp_url = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url")

    with multi_context(patch_job_id, patch_job_run_id, patch_job_type,
                       patch_webapp_url) as (
                           job_id_mock,
                           job_run_id_mock,
                           job_type_mock,
                           webapp_url_mock,
                       ):
        assert DatabricksJobRunContext().tags() == {
            MLFLOW_SOURCE_NAME:
            "jobs/{job_id}/run/{job_run_id}".format(
                job_id=job_id_mock.return_value,
                job_run_id=job_run_id_mock.return_value),
            MLFLOW_SOURCE_TYPE:
            SourceType.to_string(SourceType.JOB),
            MLFLOW_DATABRICKS_JOB_ID:
            job_id_mock.return_value,
            MLFLOW_DATABRICKS_JOB_RUN_ID:
            job_run_id_mock.return_value,
            MLFLOW_DATABRICKS_JOB_TYPE:
            job_type_mock.return_value,
            MLFLOW_DATABRICKS_WEBAPP_URL:
            webapp_url_mock.return_value,
        }
Ejemplo n.º 8
0
class SqlRun(Base):
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    name = Column(String(250))
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    source_name = Column(String(500))
    entry_point_name = Column(String(50))
    user_id = Column(String(256), nullable=True, default=None)
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    start_time = Column(BigInteger, default=int(time.time()))
    end_time = Column(BigInteger, nullable=True, default=None)
    source_version = Column(String(50))
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    artifact_uri = Column(String(200), default=None)
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    experiment = relationship('SqlExperiment',
                              backref=backref('runs', cascade='all'))

    __table_args__ = (CheckConstraint(source_type.in_(SourceTypes),
                                      name='source_type'),
                      CheckConstraint(status.in_(RunStatusTypes),
                                      name='status'),
                      CheckConstraint(lifecycle_stage.in_(
                          LifecycleStage.view_type_to_stages(ViewType.ALL)),
                                      name='lifecycle_stage'),
                      PrimaryKeyConstraint('run_uuid', name='run_pk'))

    def to_mlflow_entity(self):
        # run has diff parameter names in __init__ than in properties_ so we do this manually
        info = _create_entity(RunInfo, self)
        data = _create_entity(RunData, self)
        return Run(run_info=info, run_data=data)
Ejemplo n.º 9
0
def test_run(tmpdir, tracking_uri_mock, use_start_run):  # pylint: disable=unused-argument
    submitted_run = mlflow.projects.run(
        TEST_PROJECT_DIR,
        entry_point="test_tracking",
        parameters={"use_start_run": use_start_run},
        use_conda=False,
        experiment_id=0)
    assert submitted_run.run_id is not None
    # Blocking runs should be finished when they return
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Test that we can call wait() on a synchronous run & that the run has the correct
    # status after calling wait().
    submitted_run.wait()
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Validate run contents in the FileStore
    run_uuid = submitted_run.run_id
    mlflow_service = mlflow.tracking.MlflowClient()

    run_infos = mlflow_service.list_run_infos(
        experiment_id=0, run_view_type=ViewType.ACTIVE_ONLY)
    assert len(run_infos) == 1
    store_run_uuid = run_infos[0].run_uuid
    assert run_uuid == store_run_uuid
    run = mlflow_service.get_run(run_uuid)

    assert run.info.status == RunStatus.FINISHED

    assert run.data.params == {"use_start_run": use_start_run}
    assert run.data.metrics == {"some_key": 3}

    tags = run.data.tags
    assert "file:" in tags[MLFLOW_SOURCE_NAME]
    assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT)
    assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking"
 def tags(self):
     job_id = databricks_utils.get_job_id()
     job_run_id = databricks_utils.get_job_run_id()
     job_type = databricks_utils.get_job_type()
     webapp_url = databricks_utils.get_webapp_url()
     workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils(
     )
     tags = {
         MLFLOW_SOURCE_NAME:
         ("jobs/{job_id}/run/{job_run_id}".format(job_id=job_id,
                                                  job_run_id=job_run_id)
          if job_id is not None and job_run_id is not None else None),
         MLFLOW_SOURCE_TYPE:
         SourceType.to_string(SourceType.JOB),
     }
     if job_id is not None:
         tags[MLFLOW_DATABRICKS_JOB_ID] = job_id
     if job_run_id is not None:
         tags[MLFLOW_DATABRICKS_JOB_RUN_ID] = job_run_id
     if job_type is not None:
         tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     if workspace_url is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
     if workspace_id is not None:
         tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
     return tags
Ejemplo n.º 11
0
def test_client_create_run_overrides(mock_store):
    experiment_id = mock.Mock()
    user = mock.Mock()
    start_time = mock.Mock()
    tags = {
        MLFLOW_USER: user,
        MLFLOW_PARENT_RUN_ID: mock.Mock(),
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        MLFLOW_SOURCE_NAME: mock.Mock(),
        MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(),
        MLFLOW_GIT_COMMIT: mock.Mock(),
        "other-key": "other-value"
    }

    MlflowClient().create_run(experiment_id, start_time, tags)

    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()],
    )
    mock_store.reset_mock()
    MlflowClient().create_run(experiment_id, start_time, tags)
    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()]
    )
Ejemplo n.º 12
0
def test_start_run_with_parent():
    parent_run = mock.Mock()
    mock_experiment_id = "123456"
    mock_source_name = mock.Mock()

    active_run_stack_patch = mock.patch(
        "mlflow.tracking.fluent._active_run_stack", [parent_run])

    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user",
        return_value=mock_user)
    source_name_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_name",
        return_value=mock_source_name)

    expected_tags = {
        mlflow_tags.MLFLOW_USER: mock_user,
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL),
        mlflow_tags.MLFLOW_PARENT_RUN_ID: parent_run.info.run_id,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
            active_run_stack_patch,
            create_run_patch,
            user_patch,
            source_name_patch,
    ):
        active_run = start_run(experiment_id=mock_experiment_id, nested=True)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 13
0
def test_start_run_defaults(empty_active_run_stack):

    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id",
        return_value=mock_experiment_id)
    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False)
    mock_source_name = mock.Mock()
    source_name_patch = mock.patch("mlflow.tracking.context._get_source_name",
                                   return_value=mock_source_name)
    source_type_patch = mock.patch("mlflow.tracking.context._get_source_type",
                                   return_value=SourceType.NOTEBOOK)
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context._get_source_version",
        return_value=mock_source_version)

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE:
        SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with experiment_id_patch, databricks_notebook_patch, source_name_patch, source_type_patch, \
            source_version_patch, create_run_patch:
        active_run = start_run()
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 14
0
def test_start_run_with_parent():

    parent_run = mock.Mock()
    mock_experiment_id = mock.Mock()
    mock_source_name = mock.Mock()
    mock_run_name = mock.Mock()

    active_run_stack_patch = mock.patch(
        "mlflow.tracking.fluent._active_run_stack", [parent_run])

    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False)
    source_name_patch = mock.patch("mlflow.tracking.context._get_source_name",
                                   return_value=mock_source_name)

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL),
        mlflow_tags.MLFLOW_PARENT_RUN_ID: parent_run.info.run_id
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with databricks_notebook_patch, active_run_stack_patch, create_run_patch, source_name_patch:
        active_run = start_run(experiment_id=mock_experiment_id, nested=True)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 15
0
    def create_run(self, experiment_id, user_id, run_name, source_type, source_name,
                   entry_point_name, start_time, source_version, tags, parent_run_id):
        experiment = self.get_experiment(experiment_id)

        if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
            raise MlflowException('Experiment id={} must be active'.format(experiment_id),
                                  INVALID_STATE)

        run_uuid = uuid.uuid4().hex
        artifact_location = build_path(experiment.artifact_location, run_uuid,
                                       SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
        run = SqlRun(name=run_name or "", artifact_uri=artifact_location, run_uuid=run_uuid,
                     experiment_id=experiment_id, source_type=SourceType.to_string(source_type),
                     source_name=source_name, entry_point_name=entry_point_name,
                     user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING),
                     start_time=start_time, end_time=None,
                     source_version=source_version, lifecycle_stage=LifecycleStage.ACTIVE)

        for tag in tags:
            run.tags.append(SqlTag(key=tag.key, value=tag.value))
        if parent_run_id:
            run.tags.append(SqlTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
        if run_name:
            run.tags.append(SqlTag(key=MLFLOW_RUN_NAME, value=run_name))

        self._save_to_db([run])

        return run.to_mlflow_entity()
Ejemplo n.º 16
0
    def create_run(self, experiment_id, user_id, start_time, tags):
        with self.ManagedSessionMaker() as session:
            experiment = self.get_experiment(experiment_id)
            self._check_experiment_is_active(experiment)

            run_id = uuid.uuid4().hex
            artifact_location = posixpath.join(
                experiment.artifact_location, run_id,
                SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
            run = SqlRun(name="",
                         artifact_uri=artifact_location,
                         run_uuid=run_id,
                         experiment_id=experiment_id,
                         source_type=SourceType.to_string(SourceType.UNKNOWN),
                         source_name="",
                         entry_point_name="",
                         user_id=user_id,
                         status=RunStatus.to_string(RunStatus.RUNNING),
                         start_time=start_time,
                         end_time=None,
                         source_version="",
                         lifecycle_stage=LifecycleStage.ACTIVE)

            tags_dict = {}
            for tag in tags:
                tags_dict[tag.key] = tag.value
            run.tags = [
                SqlTag(key=key, value=value)
                for key, value in tags_dict.items()
            ]
            self._save_to_db(objs=run, session=session)

            return run.to_mlflow_entity()
Ejemplo n.º 17
0
def test_start_run_overrides(empty_active_run_stack):

    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False)

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    mock_experiment_id = mock.Mock()
    mock_source_name = mock.Mock()
    source_type = SourceType.JOB
    mock_source_version = mock.Mock()
    mock_entry_point_name = mock.Mock()
    mock_run_name = mock.Mock()

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(source_type),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT: mock_entry_point_name
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with databricks_notebook_patch, create_run_patch:
        active_run = start_run(experiment_id=mock_experiment_id,
                               source_name=mock_source_name,
                               source_version=mock_source_version,
                               entry_point_name=mock_entry_point_name,
                               source_type=source_type,
                               run_name=mock_run_name)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id,
            run_name=mock_run_name,
            tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 18
0
def test_default_run_context_tags(patch_script_name):
    mock_user = mock.Mock()
    with mock.patch("getpass.getuser", return_value=mock_user):
        assert DefaultRunContext().tags() == {
            MLFLOW_USER: mock_user,
            MLFLOW_SOURCE_NAME: MOCK_SCRIPT_NAME,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL)
        }
Ejemplo n.º 19
0
def test_start_run_defaults_databricks_notebook(empty_active_run_stack, ):  # pylint: disable=unused-argument

    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id",
        return_value=mock_experiment_id)
    databricks_notebook_patch = mock.patch(
        "mlflow.utils.databricks_utils.is_in_databricks_notebook",
        return_value=True)
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user",
        return_value=mock_user)
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context.git_context._get_source_version",
        return_value=mock_source_version)
    mock_notebook_id = mock.Mock()
    notebook_id_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_id",
        return_value=mock_notebook_id)
    mock_notebook_path = mock.Mock()
    notebook_path_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_path",
        return_value=mock_notebook_path)
    mock_webapp_url = mock.Mock()
    webapp_url_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url",
        return_value=mock_webapp_url)

    expected_tags = {
        mlflow_tags.MLFLOW_USER: mock_user,
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_notebook_path,
        mlflow_tags.MLFLOW_SOURCE_TYPE:
        SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path,
        mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
            experiment_id_patch,
            databricks_notebook_patch,
            user_patch,
            source_version_patch,
            notebook_id_patch,
            notebook_path_patch,
            webapp_url_patch,
            create_run_patch,
    ):
        active_run = start_run()
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 20
0
def _create_run(uri, experiment_id, work_dir, version, entry_point,
                parameters):
    """
    Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI,
    entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be
    used to report additional data about the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking._tracking_service.utils._get_git_url_if_present(
            _expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    source_version = _get_git_commit(work_dir)
    existing_run = fluent.active_run()
    if existing_run:
        parent_run_id = existing_run.info.run_id
    else:
        parent_run_id = None

    tags = {
        MLFLOW_USER: _get_user(),
        MLFLOW_SOURCE_NAME: source_name,
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT),
        MLFLOW_PROJECT_ENTRY_POINT: entry_point,
    }
    if source_version is not None:
        tags[MLFLOW_GIT_COMMIT] = source_version
    if parent_run_id is not None:
        tags[MLFLOW_PARENT_RUN_ID] = parent_run_id

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        tags[MLFLOW_GIT_REPO_URL] = repo_url
        tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        tags[MLFLOW_GIT_BRANCH] = version
        tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] = version
    active_run = tracking.MlflowClient().create_run(
        experiment_id=experiment_id, tags=tags)

    project = _project_spec.load_project(work_dir)
    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(
        parameters, storage_dir=None)
    params_list = [
        Param(key, value) for key, value in list(final_params.items()) +
        list(extra_params.items())
    ]
    tracking.MlflowClient().log_batch(active_run.info.run_id,
                                      params=params_list)
    return active_run
Ejemplo n.º 21
0
def test_databricks_job_run_context_tags_nones():
    patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id", return_value=None)
    patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id", return_value=None)
    patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type", return_value=None)
    patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None)

    with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url:
        assert DatabricksJobRunContext().tags() == {
            MLFLOW_SOURCE_NAME: None,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        }
Ejemplo n.º 22
0
def test_run_local_git_repo(
        patch_user,  # pylint: disable=unused-argument
        local_git_repo,
        local_git_repo_uri,
        tracking_uri_mock,  # pylint: disable=unused-argument
        use_start_run,
        version):
    if version is not None:
        uri = local_git_repo_uri + "#" + TEST_PROJECT_NAME
    else:
        uri = os.path.join("%s/" % local_git_repo, TEST_PROJECT_NAME)
    if version == "git-commit":
        version = _get_version_local_git_repo(local_git_repo)
    submitted_run = mlflow.projects.run(
        uri,
        entry_point="test_tracking",
        version=version,
        parameters={"use_start_run": use_start_run},
        use_conda=False,
        experiment_id=FileStore.DEFAULT_EXPERIMENT_ID)

    # Blocking runs should be finished when they return
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Test that we can call wait() on a synchronous run & that the run has the correct
    # status after calling wait().
    submitted_run.wait()
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Validate run contents in the FileStore
    run_id = submitted_run.run_id
    mlflow_service = mlflow.tracking.MlflowClient()
    run_infos = mlflow_service.list_run_infos(
        experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
        run_view_type=ViewType.ACTIVE_ONLY)
    assert len(run_infos) == 1
    store_run_id = run_infos[0].run_id
    assert run_id == store_run_id
    run = mlflow_service.get_run(run_id)

    assert run.info.status == RunStatus.to_string(RunStatus.FINISHED)
    assert run.data.params == {"use_start_run": use_start_run}
    assert run.data.metrics == {"some_key": 3}

    tags = run.data.tags
    assert tags[MLFLOW_USER] == MOCK_USER
    assert "file:" in tags[MLFLOW_SOURCE_NAME]
    assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT)
    assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking"
    assert tags[MLFLOW_PROJECT_BACKEND] == "local"

    if version == "master":
        assert tags[MLFLOW_GIT_BRANCH] == "master"
        assert tags[MLFLOW_GIT_REPO_URL] == local_git_repo_uri
        assert tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] == "master"
        assert tags[LEGACY_MLFLOW_GIT_REPO_URL] == local_git_repo_uri
Ejemplo n.º 23
0
def test_start_run_creates_new_run_with_user_specified_tags():

    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id
    )
    databricks_notebook_patch = mock.patch(
        "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False
    )
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user", return_value=mock_user
    )
    mock_source_name = mock.Mock()
    source_name_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_name", return_value=mock_source_name
    )
    source_type_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.NOTEBOOK
    )
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context.git_context._get_source_version", return_value=mock_source_version
    )
    user_specified_tags = {
        "ml_task": "regression",
        "num_layers": 7,
        mlflow_tags.MLFLOW_USER: "******",
    }
    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_USER: "******",
        "ml_task": "regression",
        "num_layers": 7,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
        experiment_id_patch,
        databricks_notebook_patch,
        user_patch,
        source_name_patch,
        source_type_patch,
        source_version_patch,
        create_run_patch,
    ):
        active_run = start_run(tags=user_specified_tags)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags
        )
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 24
0
def test_databricks_notebook_run_context_tags_nones():
    patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id",
                                   return_value=None)
    patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path",
                                     return_value=None)
    patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url",
                                  return_value=None)

    with patch_notebook_id, patch_notebook_path, patch_webapp_url:
        assert DatabricksNotebookRunContext().tags() == {
            MLFLOW_SOURCE_NAME: None,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
        }
Ejemplo n.º 25
0
def test_start_run_overrides_databricks_notebook(empty_active_run_stack):

    databricks_notebook_patch = mock.patch(
        "mlflow.utils.databricks_utils.is_in_databricks_notebook",
        return_value=True)
    mock_notebook_id = mock.Mock()
    notebook_id_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_id",
        return_value=mock_notebook_id)
    mock_notebook_path = mock.Mock()
    notebook_path_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_notebook_path",
        return_value=mock_notebook_path)
    mock_webapp_url = mock.Mock()
    webapp_url_patch = mock.patch(
        "mlflow.utils.databricks_utils.get_webapp_url",
        return_value=mock_webapp_url)

    mock_experiment_id = mock.Mock()
    mock_source_name = mock.Mock()
    source_type = SourceType.JOB
    mock_source_version = mock.Mock()
    mock_entry_point_name = mock.Mock()
    mock_run_name = mock.Mock()

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(source_type),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT: mock_entry_point_name,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id,
        mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path,
        mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with databricks_notebook_patch, notebook_id_patch, notebook_path_patch, webapp_url_patch, \
            create_run_patch:
        active_run = start_run(experiment_id=mock_experiment_id,
                               source_name=mock_source_name,
                               source_version=mock_source_version,
                               entry_point_name=mock_entry_point_name,
                               source_type=source_type,
                               run_name=mock_run_name)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id,
            run_name=mock_run_name,
            tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 26
0
def test_databricks_notebook_run_context_tags():
    patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id")
    patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path")
    patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")

    with patch_notebook_id as notebook_id_mock, patch_notebook_path as notebook_path_mock, \
            patch_webapp_url as webapp_url_mock:
        assert DatabricksNotebookRunContext().tags() == {
            MLFLOW_SOURCE_NAME: notebook_path_mock.return_value,
            MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
            MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value,
            MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value,
            MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value
        }
Ejemplo n.º 27
0
 def tags(self):
     notebook_id = databricks_utils.get_notebook_id()
     notebook_path = databricks_utils.get_notebook_path()
     webapp_url = databricks_utils.get_webapp_url()
     tags = {
         MLFLOW_SOURCE_NAME: notebook_path,
         MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK)
     }
     if notebook_id is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id
     if notebook_path is not None:
         tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
     if webapp_url is not None:
         tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
     return tags
Ejemplo n.º 28
0
 def _get_run_configs(self, name='test', experiment_id=None):
     return {
         'experiment_id': experiment_id,
         'name': name,
         'user_id': 'Anderson',
         'run_uuid': uuid.uuid4().hex,
         'status': RunStatus.to_string(RunStatus.SCHEDULED),
         'source_type': SourceType.to_string(SourceType.NOTEBOOK),
         'source_name': 'Python application',
         'entry_point_name': 'main.py',
         'start_time': int(time.time()),
         'end_time': int(time.time()),
         'source_version': mlflow.__version__,
         'lifecycle_stage': entities.LifecycleStage.ACTIVE,
         'artifact_uri': '//'
     }
Ejemplo n.º 29
0
def test_start_run_with_description(empty_active_run_stack):  # pylint: disable=unused-argument
    mock_experiment_id = mock.Mock()
    experiment_id_patch = mock.patch(
        "mlflow.tracking.fluent._get_experiment_id",
        return_value=mock_experiment_id)
    mock_user = mock.Mock()
    user_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_user",
        return_value=mock_user)
    mock_source_name = mock.Mock()
    source_name_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_name",
        return_value=mock_source_name)
    source_type_patch = mock.patch(
        "mlflow.tracking.context.default_context._get_source_type",
        return_value=SourceType.NOTEBOOK)
    mock_source_version = mock.Mock()
    source_version_patch = mock.patch(
        "mlflow.tracking.context.git_context._get_source_version",
        return_value=mock_source_version)

    description = "Test description"

    expected_tags = {
        mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name,
        mlflow_tags.MLFLOW_SOURCE_TYPE:
        SourceType.to_string(SourceType.NOTEBOOK),
        mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version,
        mlflow_tags.MLFLOW_USER: mock_user,
        mlflow_tags.MLFLOW_RUN_NOTE: description,
    }

    create_run_patch = mock.patch.object(MlflowClient, "create_run")

    with multi_context(
            experiment_id_patch,
            user_patch,
            source_name_patch,
            source_type_patch,
            source_version_patch,
            create_run_patch,
    ):
        active_run = start_run(description=description)
        MlflowClient.create_run.assert_called_once_with(
            experiment_id=mock_experiment_id, tags=expected_tags)
        assert is_from_run(active_run, MlflowClient.create_run.return_value)
Ejemplo n.º 30
0
    def create_run(self, experiment_id, user_id, run_name, source_type,
                   source_name, entry_point_name, start_time, source_version,
                   tags, parent_run_id):
        with self.ManagedSessionMaker() as session:
            experiment = self.get_experiment(experiment_id)

            if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
                raise MlflowException(
                    'Experiment id={} must be active'.format(experiment_id),
                    INVALID_STATE)

            run_uuid = uuid.uuid4().hex
            artifact_location = build_path(
                experiment.artifact_location, run_uuid,
                SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
            run = SqlRun(name=run_name or "",
                         artifact_uri=artifact_location,
                         run_uuid=run_uuid,
                         experiment_id=experiment_id,
                         source_type=SourceType.to_string(source_type),
                         source_name=source_name,
                         entry_point_name=entry_point_name,
                         user_id=user_id,
                         status=RunStatus.to_string(RunStatus.RUNNING),
                         start_time=start_time,
                         end_time=None,
                         source_version=source_version,
                         lifecycle_stage=LifecycleStage.ACTIVE)

            tags_dict = {}
            for tag in tags:
                tags_dict[tag.key] = tag.value
            if parent_run_id:
                tags_dict[MLFLOW_PARENT_RUN_ID] = parent_run_id
            if run_name:
                tags_dict[MLFLOW_RUN_NAME] = run_name
            run.tags = [
                SqlTag(key=key, value=value)
                for key, value in tags_dict.items()
            ]
            self._save_to_db(objs=run, session=session)

            return run.to_mlflow_entity()