def test_run_info(self): experiment_id = self._experiment_factory('test exp') config = { 'experiment_id': experiment_id, 'name': 'test run', 'user_id': 'Anderson', 'run_uuid': 'test', 'status': RunStatus.to_string(RunStatus.SCHEDULED), 'source_type': SourceType.to_string(SourceType.LOCAL), 'source_name': 'Python application', 'entry_point_name': 'main.py', 'start_time': int(time.time()), 'end_time': int(time.time()), 'source_version': mlflow.__version__, 'lifecycle_stage': entities.LifecycleStage.ACTIVE, 'artifact_uri': '//' } run = models.SqlRun(**config).to_mlflow_entity() for k, v in config.items(): v2 = getattr(run.info, k) if k == 'source_type': self.assertEqual(v, SourceType.to_string(v2)) elif k == 'status': self.assertEqual(v, RunStatus.to_string(v2)) else: self.assertEqual(v, v2)
def test_client_create_run_overrides(mock_store): experiment_id = mock.Mock() user_id = mock.Mock() run_name = mock.Mock() start_time = mock.Mock() tags = { MLFLOW_PARENT_RUN_ID: mock.Mock(), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), MLFLOW_SOURCE_NAME: mock.Mock(), MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(), MLFLOW_GIT_COMMIT: mock.Mock(), "other-key": "other-value" } MlflowClient().create_run(experiment_id, user_id, run_name, start_time, tags) mock_store.create_run.assert_called_once_with( experiment_id=experiment_id, user_id=user_id, run_name=run_name, start_time=start_time, tags=[RunTag(key, value) for key, value in tags.items()], parent_run_id=tags[MLFLOW_PARENT_RUN_ID], source_type=SourceType.JOB, source_name=tags[MLFLOW_SOURCE_NAME], entry_point_name=tags[MLFLOW_PROJECT_ENTRY_POINT], source_version=tags[MLFLOW_GIT_COMMIT])
def _create_run(uri, experiment_id, work_dir, entry_point): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri)) else: source_name = _expand_uri(uri) source_version = _get_git_commit(work_dir) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_id else: parent_run_id = None tags = { MLFLOW_USER: _get_user(), MLFLOW_SOURCE_NAME: source_name, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT), MLFLOW_PROJECT_ENTRY_POINT: entry_point } if source_version is not None: tags[MLFLOW_GIT_COMMIT] = source_version if parent_run_id is not None: tags[MLFLOW_PARENT_RUN_ID] = parent_run_id active_run = tracking.MlflowClient().create_run(experiment_id=experiment_id, tags=tags) return active_run
def test_get_experiment_by_id_with_is_in_databricks_job(): exp_id = 768 job_id = 123 exp_name = "jobs:/" + str(job_id) job_type_info = "NORMAL" with mock.patch( "mlflow.tracking.fluent.is_in_databricks_job" ) as job_detection_mock, mock.patch( "mlflow.tracking.fluent.get_job_type_info" ) as job_type_info_mock, mock.patch( "mlflow.tracking.fluent.get_job_id") as job_id_mock, mock.patch( "mlflow.tracking.fluent.get_experiment_name_from_job_id" ) as job_to_experiment_name_mapping_mock, mock.patch.object( MlflowClient, "create_experiment", return_value=exp_id): job_detection_mock.return_value = True job_type_info_mock.return_value = job_type_info job_id_mock.return_value = job_id job_to_experiment_name_mapping_mock.return_value = exp_name tags = {} tags[MLFLOW_DATABRICKS_JOB_TYPE_INFO] = job_type_info tags[MLFLOW_EXPERIMENT_SOURCE_TYPE] = SourceType.to_string( SourceType.JOB) tags[MLFLOW_EXPERIMENT_SOURCE_ID] = job_id assert _get_experiment_id() == exp_id MlflowClient.create_experiment.assert_called_with(exp_name, None, tags)
def test_databricks_notebook_run_context_tags(): patch_notebook_id = mock.patch( "mlflow.utils.databricks_utils.get_notebook_id") patch_notebook_path = mock.patch( "mlflow.utils.databricks_utils.get_notebook_path") patch_webapp_url = mock.patch( "mlflow.utils.databricks_utils.get_webapp_url") patch_workspace_info = mock.patch( "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=("https://databricks.com", "123456"), ) with multi_context(patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info) as ( notebook_id_mock, notebook_path_mock, webapp_url_mock, workspace_info_mock, ): assert DatabricksNotebookRunContext().tags() == { MLFLOW_SOURCE_NAME: notebook_path_mock.return_value, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value, MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value, MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value, MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0], MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1], }
def test_databricks_job_default_experiment_id(): job_id = "job_id" exp_name = "jobs:/" + str(job_id) patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id", return_value=job_id) patch_job_type = mock.patch( "mlflow.utils.databricks_utils.get_job_type_info", return_value="NORMAL") patch_experiment_name_from_job_id = mock.patch( "mlflow.utils.databricks_utils.get_experiment_name_from_job_id", return_value=exp_name) experiment_id = "experiment_id" create_experiment = mock.patch.object(MlflowClient, "create_experiment", return_value=experiment_id) with multi_context(patch_job_id, patch_job_type, patch_experiment_name_from_job_id, create_experiment) as ( job_id_mock, job_type_info_mock, experiment_name_from_job_id_mock, create_experiment_mock, ): tags = {} tags[MLFLOW_DATABRICKS_JOB_TYPE_INFO] = job_type_info_mock.return_value tags[MLFLOW_EXPERIMENT_SOURCE_TYPE] = SourceType.to_string( SourceType.JOB) tags[MLFLOW_EXPERIMENT_SOURCE_ID] = job_id_mock.return_value assert DatabricksJobExperimentProvider().get_experiment_id( ) == experiment_id create_experiment_mock.assert_called_once_with( experiment_name_from_job_id_mock.return_value, None, tags)
def test_databricks_job_run_context_tags(): patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id") patch_job_run_id = mock.patch( "mlflow.utils.databricks_utils.get_job_run_id") patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type") patch_webapp_url = mock.patch( "mlflow.utils.databricks_utils.get_webapp_url") with multi_context(patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url) as ( job_id_mock, job_run_id_mock, job_type_mock, webapp_url_mock, ): assert DatabricksJobRunContext().tags() == { MLFLOW_SOURCE_NAME: "jobs/{job_id}/run/{job_run_id}".format( job_id=job_id_mock.return_value, job_run_id=job_run_id_mock.return_value), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), MLFLOW_DATABRICKS_JOB_ID: job_id_mock.return_value, MLFLOW_DATABRICKS_JOB_RUN_ID: job_run_id_mock.return_value, MLFLOW_DATABRICKS_JOB_TYPE: job_type_mock.return_value, MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value, }
class SqlRun(Base): __tablename__ = 'runs' run_uuid = Column(String(32), nullable=False) name = Column(String(250)) source_type = Column(String(20), default=SourceType.to_string(SourceType.LOCAL)) source_name = Column(String(500)) entry_point_name = Column(String(50)) user_id = Column(String(256), nullable=True, default=None) status = Column(String(20), default=RunStatus.to_string(RunStatus.SCHEDULED)) start_time = Column(BigInteger, default=int(time.time())) end_time = Column(BigInteger, nullable=True, default=None) source_version = Column(String(50)) lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE) artifact_uri = Column(String(200), default=None) experiment_id = Column(Integer, ForeignKey('experiments.experiment_id')) experiment = relationship('SqlExperiment', backref=backref('runs', cascade='all')) __table_args__ = (CheckConstraint(source_type.in_(SourceTypes), name='source_type'), CheckConstraint(status.in_(RunStatusTypes), name='status'), CheckConstraint(lifecycle_stage.in_( LifecycleStage.view_type_to_stages(ViewType.ALL)), name='lifecycle_stage'), PrimaryKeyConstraint('run_uuid', name='run_pk')) def to_mlflow_entity(self): # run has diff parameter names in __init__ than in properties_ so we do this manually info = _create_entity(RunInfo, self) data = _create_entity(RunData, self) return Run(run_info=info, run_data=data)
def test_run(tmpdir, tracking_uri_mock, use_start_run): # pylint: disable=unused-argument submitted_run = mlflow.projects.run( TEST_PROJECT_DIR, entry_point="test_tracking", parameters={"use_start_run": use_start_run}, use_conda=False, experiment_id=0) assert submitted_run.run_id is not None # Blocking runs should be finished when they return validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED) # Test that we can call wait() on a synchronous run & that the run has the correct # status after calling wait(). submitted_run.wait() validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED) # Validate run contents in the FileStore run_uuid = submitted_run.run_id mlflow_service = mlflow.tracking.MlflowClient() run_infos = mlflow_service.list_run_infos( experiment_id=0, run_view_type=ViewType.ACTIVE_ONLY) assert len(run_infos) == 1 store_run_uuid = run_infos[0].run_uuid assert run_uuid == store_run_uuid run = mlflow_service.get_run(run_uuid) assert run.info.status == RunStatus.FINISHED assert run.data.params == {"use_start_run": use_start_run} assert run.data.metrics == {"some_key": 3} tags = run.data.tags assert "file:" in tags[MLFLOW_SOURCE_NAME] assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT) assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking"
def tags(self): job_id = databricks_utils.get_job_id() job_run_id = databricks_utils.get_job_run_id() job_type = databricks_utils.get_job_type() webapp_url = databricks_utils.get_webapp_url() workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils( ) tags = { MLFLOW_SOURCE_NAME: ("jobs/{job_id}/run/{job_run_id}".format(job_id=job_id, job_run_id=job_run_id) if job_id is not None and job_run_id is not None else None), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), } if job_id is not None: tags[MLFLOW_DATABRICKS_JOB_ID] = job_id if job_run_id is not None: tags[MLFLOW_DATABRICKS_JOB_RUN_ID] = job_run_id if job_type is not None: tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type if webapp_url is not None: tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url if workspace_url is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url if workspace_id is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags
def test_client_create_run_overrides(mock_store): experiment_id = mock.Mock() user = mock.Mock() start_time = mock.Mock() tags = { MLFLOW_USER: user, MLFLOW_PARENT_RUN_ID: mock.Mock(), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), MLFLOW_SOURCE_NAME: mock.Mock(), MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(), MLFLOW_GIT_COMMIT: mock.Mock(), "other-key": "other-value" } MlflowClient().create_run(experiment_id, start_time, tags) mock_store.create_run.assert_called_once_with( experiment_id=experiment_id, user_id=user, start_time=start_time, tags=[RunTag(key, value) for key, value in tags.items()], ) mock_store.reset_mock() MlflowClient().create_run(experiment_id, start_time, tags) mock_store.create_run.assert_called_once_with( experiment_id=experiment_id, user_id=user, start_time=start_time, tags=[RunTag(key, value) for key, value in tags.items()] )
def test_start_run_with_parent(): parent_run = mock.Mock() mock_experiment_id = "123456" mock_source_name = mock.Mock() active_run_stack_patch = mock.patch( "mlflow.tracking.fluent._active_run_stack", [parent_run]) mock_user = mock.Mock() user_patch = mock.patch( "mlflow.tracking.context.default_context._get_user", return_value=mock_user) source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=mock_source_name) expected_tags = { mlflow_tags.MLFLOW_USER: mock_user, mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL), mlflow_tags.MLFLOW_PARENT_RUN_ID: parent_run.info.run_id, } create_run_patch = mock.patch.object(MlflowClient, "create_run") with multi_context( active_run_stack_patch, create_run_patch, user_patch, source_name_patch, ): active_run = start_run(experiment_id=mock_experiment_id, nested=True) MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def test_start_run_defaults(empty_active_run_stack): mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id) databricks_notebook_patch = mock.patch( "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False) mock_source_name = mock.Mock() source_name_patch = mock.patch("mlflow.tracking.context._get_source_name", return_value=mock_source_name) source_type_patch = mock.patch("mlflow.tracking.context._get_source_type", return_value=SourceType.NOTEBOOK) mock_source_version = mock.Mock() source_version_patch = mock.patch( "mlflow.tracking.context._get_source_version", return_value=mock_source_version) expected_tags = { mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version } create_run_patch = mock.patch.object(MlflowClient, "create_run") with experiment_id_patch, databricks_notebook_patch, source_name_patch, source_type_patch, \ source_version_patch, create_run_patch: active_run = start_run() MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def test_start_run_with_parent(): parent_run = mock.Mock() mock_experiment_id = mock.Mock() mock_source_name = mock.Mock() mock_run_name = mock.Mock() active_run_stack_patch = mock.patch( "mlflow.tracking.fluent._active_run_stack", [parent_run]) databricks_notebook_patch = mock.patch( "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False) source_name_patch = mock.patch("mlflow.tracking.context._get_source_name", return_value=mock_source_name) expected_tags = { mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL), mlflow_tags.MLFLOW_PARENT_RUN_ID: parent_run.info.run_id } create_run_patch = mock.patch.object(MlflowClient, "create_run") with databricks_notebook_patch, active_run_stack_patch, create_run_patch, source_name_patch: active_run = start_run(experiment_id=mock_experiment_id, nested=True) MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): experiment = self.get_experiment(experiment_id) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException('Experiment id={} must be active'.format(experiment_id), INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_location = build_path(experiment.artifact_location, run_uuid, SqlAlchemyStore.ARTIFACTS_FOLDER_NAME) run = SqlRun(name=run_name or "", artifact_uri=artifact_location, run_uuid=run_uuid, experiment_id=experiment_id, source_type=SourceType.to_string(source_type), source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=LifecycleStage.ACTIVE) for tag in tags: run.tags.append(SqlTag(key=tag.key, value=tag.value)) if parent_run_id: run.tags.append(SqlTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id)) if run_name: run.tags.append(SqlTag(key=MLFLOW_RUN_NAME, value=run_name)) self._save_to_db([run]) return run.to_mlflow_entity()
def create_run(self, experiment_id, user_id, start_time, tags): with self.ManagedSessionMaker() as session: experiment = self.get_experiment(experiment_id) self._check_experiment_is_active(experiment) run_id = uuid.uuid4().hex artifact_location = posixpath.join( experiment.artifact_location, run_id, SqlAlchemyStore.ARTIFACTS_FOLDER_NAME) run = SqlRun(name="", artifact_uri=artifact_location, run_uuid=run_id, experiment_id=experiment_id, source_type=SourceType.to_string(SourceType.UNKNOWN), source_name="", entry_point_name="", user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, source_version="", lifecycle_stage=LifecycleStage.ACTIVE) tags_dict = {} for tag in tags: tags_dict[tag.key] = tag.value run.tags = [ SqlTag(key=key, value=value) for key, value in tags_dict.items() ] self._save_to_db(objs=run, session=session) return run.to_mlflow_entity()
def test_start_run_overrides(empty_active_run_stack): databricks_notebook_patch = mock.patch( "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False) create_run_patch = mock.patch.object(MlflowClient, "create_run") mock_experiment_id = mock.Mock() mock_source_name = mock.Mock() source_type = SourceType.JOB mock_source_version = mock.Mock() mock_entry_point_name = mock.Mock() mock_run_name = mock.Mock() expected_tags = { mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(source_type), mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version, mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT: mock_entry_point_name } create_run_patch = mock.patch.object(MlflowClient, "create_run") with databricks_notebook_patch, create_run_patch: active_run = start_run(experiment_id=mock_experiment_id, source_name=mock_source_name, source_version=mock_source_version, entry_point_name=mock_entry_point_name, source_type=source_type, run_name=mock_run_name) MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, run_name=mock_run_name, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def test_default_run_context_tags(patch_script_name): mock_user = mock.Mock() with mock.patch("getpass.getuser", return_value=mock_user): assert DefaultRunContext().tags() == { MLFLOW_USER: mock_user, MLFLOW_SOURCE_NAME: MOCK_SCRIPT_NAME, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.LOCAL) }
def test_start_run_defaults_databricks_notebook(empty_active_run_stack, ): # pylint: disable=unused-argument mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id) databricks_notebook_patch = mock.patch( "mlflow.utils.databricks_utils.is_in_databricks_notebook", return_value=True) mock_user = mock.Mock() user_patch = mock.patch( "mlflow.tracking.context.default_context._get_user", return_value=mock_user) mock_source_version = mock.Mock() source_version_patch = mock.patch( "mlflow.tracking.context.git_context._get_source_version", return_value=mock_source_version) mock_notebook_id = mock.Mock() notebook_id_patch = mock.patch( "mlflow.utils.databricks_utils.get_notebook_id", return_value=mock_notebook_id) mock_notebook_path = mock.Mock() notebook_path_patch = mock.patch( "mlflow.utils.databricks_utils.get_notebook_path", return_value=mock_notebook_path) mock_webapp_url = mock.Mock() webapp_url_patch = mock.patch( "mlflow.utils.databricks_utils.get_webapp_url", return_value=mock_webapp_url) expected_tags = { mlflow_tags.MLFLOW_USER: mock_user, mlflow_tags.MLFLOW_SOURCE_NAME: mock_notebook_path, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version, mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id, mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path, mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url, } create_run_patch = mock.patch.object(MlflowClient, "create_run") with multi_context( experiment_id_patch, databricks_notebook_patch, user_patch, source_version_patch, notebook_id_patch, notebook_path_patch, webapp_url_patch, create_run_patch, ): active_run = start_run() MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def _create_run(uri, experiment_id, work_dir, version, entry_point, parameters): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking._tracking_service.utils._get_git_url_if_present( _expand_uri(uri)) else: source_name = _expand_uri(uri) source_version = _get_git_commit(work_dir) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_id else: parent_run_id = None tags = { MLFLOW_USER: _get_user(), MLFLOW_SOURCE_NAME: source_name, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT), MLFLOW_PROJECT_ENTRY_POINT: entry_point, } if source_version is not None: tags[MLFLOW_GIT_COMMIT] = source_version if parent_run_id is not None: tags[MLFLOW_PARENT_RUN_ID] = parent_run_id repo_url = _get_git_repo_url(work_dir) if repo_url is not None: tags[MLFLOW_GIT_REPO_URL] = repo_url tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): tags[MLFLOW_GIT_BRANCH] = version tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] = version active_run = tracking.MlflowClient().create_run( experiment_id=experiment_id, tags=tags) project = _project_spec.load_project(work_dir) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters( parameters, storage_dir=None) params_list = [ Param(key, value) for key, value in list(final_params.items()) + list(extra_params.items()) ] tracking.MlflowClient().log_batch(active_run.info.run_id, params=params_list) return active_run
def test_databricks_job_run_context_tags_nones(): patch_job_id = mock.patch("mlflow.utils.databricks_utils.get_job_id", return_value=None) patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id", return_value=None) patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type", return_value=None) patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None) with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url: assert DatabricksJobRunContext().tags() == { MLFLOW_SOURCE_NAME: None, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), }
def test_run_local_git_repo( patch_user, # pylint: disable=unused-argument local_git_repo, local_git_repo_uri, tracking_uri_mock, # pylint: disable=unused-argument use_start_run, version): if version is not None: uri = local_git_repo_uri + "#" + TEST_PROJECT_NAME else: uri = os.path.join("%s/" % local_git_repo, TEST_PROJECT_NAME) if version == "git-commit": version = _get_version_local_git_repo(local_git_repo) submitted_run = mlflow.projects.run( uri, entry_point="test_tracking", version=version, parameters={"use_start_run": use_start_run}, use_conda=False, experiment_id=FileStore.DEFAULT_EXPERIMENT_ID) # Blocking runs should be finished when they return validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED) # Test that we can call wait() on a synchronous run & that the run has the correct # status after calling wait(). submitted_run.wait() validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED) # Validate run contents in the FileStore run_id = submitted_run.run_id mlflow_service = mlflow.tracking.MlflowClient() run_infos = mlflow_service.list_run_infos( experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, run_view_type=ViewType.ACTIVE_ONLY) assert len(run_infos) == 1 store_run_id = run_infos[0].run_id assert run_id == store_run_id run = mlflow_service.get_run(run_id) assert run.info.status == RunStatus.to_string(RunStatus.FINISHED) assert run.data.params == {"use_start_run": use_start_run} assert run.data.metrics == {"some_key": 3} tags = run.data.tags assert tags[MLFLOW_USER] == MOCK_USER assert "file:" in tags[MLFLOW_SOURCE_NAME] assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT) assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking" assert tags[MLFLOW_PROJECT_BACKEND] == "local" if version == "master": assert tags[MLFLOW_GIT_BRANCH] == "master" assert tags[MLFLOW_GIT_REPO_URL] == local_git_repo_uri assert tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] == "master" assert tags[LEGACY_MLFLOW_GIT_REPO_URL] == local_git_repo_uri
def test_start_run_creates_new_run_with_user_specified_tags(): mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id ) databricks_notebook_patch = mock.patch( "mlflow.tracking.fluent.is_in_databricks_notebook", return_value=False ) mock_user = mock.Mock() user_patch = mock.patch( "mlflow.tracking.context.default_context._get_user", return_value=mock_user ) mock_source_name = mock.Mock() source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=mock_source_name ) source_type_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.NOTEBOOK ) mock_source_version = mock.Mock() source_version_patch = mock.patch( "mlflow.tracking.context.git_context._get_source_version", return_value=mock_source_version ) user_specified_tags = { "ml_task": "regression", "num_layers": 7, mlflow_tags.MLFLOW_USER: "******", } expected_tags = { mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version, mlflow_tags.MLFLOW_USER: "******", "ml_task": "regression", "num_layers": 7, } create_run_patch = mock.patch.object(MlflowClient, "create_run") with multi_context( experiment_id_patch, databricks_notebook_patch, user_patch, source_name_patch, source_type_patch, source_version_patch, create_run_patch, ): active_run = start_run(tags=user_specified_tags) MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, tags=expected_tags ) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def test_databricks_notebook_run_context_tags_nones(): patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id", return_value=None) patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path", return_value=None) patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None) with patch_notebook_id, patch_notebook_path, patch_webapp_url: assert DatabricksNotebookRunContext().tags() == { MLFLOW_SOURCE_NAME: None, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), }
def test_start_run_overrides_databricks_notebook(empty_active_run_stack): databricks_notebook_patch = mock.patch( "mlflow.utils.databricks_utils.is_in_databricks_notebook", return_value=True) mock_notebook_id = mock.Mock() notebook_id_patch = mock.patch( "mlflow.utils.databricks_utils.get_notebook_id", return_value=mock_notebook_id) mock_notebook_path = mock.Mock() notebook_path_patch = mock.patch( "mlflow.utils.databricks_utils.get_notebook_path", return_value=mock_notebook_path) mock_webapp_url = mock.Mock() webapp_url_patch = mock.patch( "mlflow.utils.databricks_utils.get_webapp_url", return_value=mock_webapp_url) mock_experiment_id = mock.Mock() mock_source_name = mock.Mock() source_type = SourceType.JOB mock_source_version = mock.Mock() mock_entry_point_name = mock.Mock() mock_run_name = mock.Mock() expected_tags = { mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(source_type), mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version, mlflow_tags.MLFLOW_PROJECT_ENTRY_POINT: mock_entry_point_name, mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id, mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path, mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url } create_run_patch = mock.patch.object(MlflowClient, "create_run") with databricks_notebook_patch, notebook_id_patch, notebook_path_patch, webapp_url_patch, \ create_run_patch: active_run = start_run(experiment_id=mock_experiment_id, source_name=mock_source_name, source_version=mock_source_version, entry_point_name=mock_entry_point_name, source_type=source_type, run_name=mock_run_name) MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, run_name=mock_run_name, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def test_databricks_notebook_run_context_tags(): patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id") patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path") patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url") with patch_notebook_id as notebook_id_mock, patch_notebook_path as notebook_path_mock, \ patch_webapp_url as webapp_url_mock: assert DatabricksNotebookRunContext().tags() == { MLFLOW_SOURCE_NAME: notebook_path_mock.return_value, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value, MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value, MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value }
def tags(self): notebook_id = databricks_utils.get_notebook_id() notebook_path = databricks_utils.get_notebook_path() webapp_url = databricks_utils.get_webapp_url() tags = { MLFLOW_SOURCE_NAME: notebook_path, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK) } if notebook_id is not None: tags[MLFLOW_DATABRICKS_NOTEBOOK_ID] = notebook_id if notebook_path is not None: tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path if webapp_url is not None: tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url return tags
def _get_run_configs(self, name='test', experiment_id=None): return { 'experiment_id': experiment_id, 'name': name, 'user_id': 'Anderson', 'run_uuid': uuid.uuid4().hex, 'status': RunStatus.to_string(RunStatus.SCHEDULED), 'source_type': SourceType.to_string(SourceType.NOTEBOOK), 'source_name': 'Python application', 'entry_point_name': 'main.py', 'start_time': int(time.time()), 'end_time': int(time.time()), 'source_version': mlflow.__version__, 'lifecycle_stage': entities.LifecycleStage.ACTIVE, 'artifact_uri': '//' }
def test_start_run_with_description(empty_active_run_stack): # pylint: disable=unused-argument mock_experiment_id = mock.Mock() experiment_id_patch = mock.patch( "mlflow.tracking.fluent._get_experiment_id", return_value=mock_experiment_id) mock_user = mock.Mock() user_patch = mock.patch( "mlflow.tracking.context.default_context._get_user", return_value=mock_user) mock_source_name = mock.Mock() source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=mock_source_name) source_type_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.NOTEBOOK) mock_source_version = mock.Mock() source_version_patch = mock.patch( "mlflow.tracking.context.git_context._get_source_version", return_value=mock_source_version) description = "Test description" expected_tags = { mlflow_tags.MLFLOW_SOURCE_NAME: mock_source_name, mlflow_tags.MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), mlflow_tags.MLFLOW_GIT_COMMIT: mock_source_version, mlflow_tags.MLFLOW_USER: mock_user, mlflow_tags.MLFLOW_RUN_NOTE: description, } create_run_patch = mock.patch.object(MlflowClient, "create_run") with multi_context( experiment_id_patch, user_patch, source_name_patch, source_type_patch, source_version_patch, create_run_patch, ): active_run = start_run(description=description) MlflowClient.create_run.assert_called_once_with( experiment_id=mock_experiment_id, tags=expected_tags) assert is_from_run(active_run, MlflowClient.create_run.return_value)
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): with self.ManagedSessionMaker() as session: experiment = self.get_experiment(experiment_id) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( 'Experiment id={} must be active'.format(experiment_id), INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_location = build_path( experiment.artifact_location, run_uuid, SqlAlchemyStore.ARTIFACTS_FOLDER_NAME) run = SqlRun(name=run_name or "", artifact_uri=artifact_location, run_uuid=run_uuid, experiment_id=experiment_id, source_type=SourceType.to_string(source_type), source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=LifecycleStage.ACTIVE) tags_dict = {} for tag in tags: tags_dict[tag.key] = tag.value if parent_run_id: tags_dict[MLFLOW_PARENT_RUN_ID] = parent_run_id if run_name: tags_dict[MLFLOW_RUN_NAME] = run_name run.tags = [ SqlTag(key=key, value=value) for key, value in tags_dict.items() ] self._save_to_db(objs=run, session=session) return run.to_mlflow_entity()