def _create_experiment_with_id(self, name, experiment_id, artifact_uri): self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) artifact_uri = artifact_uri or build_path(self.artifact_root_uri, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, dict(experiment)) return experiment_id
def test_message_to_json(): json_out = message_to_json( Experiment(123, "name", "arty", 'active').to_proto()) assert json.loads(json_out) == { "experiment_id": "123", "name": "name", "artifact_location": "arty", "lifecycle_stage": 'active', }
def get_experiment(self, experiment_id): """ Fetches the experiment from the backend store. :param experiment_id: Integer id for the experiment :return: A single Experiment object if it exists, otherwise raises an Exception. """ req_body = message_to_json(GetExperiment(experiment_id=experiment_id)) response_proto = self._call_endpoint(GetExperiment, req_body) return Experiment.from_proto(response_proto.experiment)
def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Experiment`. """ return Experiment(experiment_id=str(self.experiment_id), name=self.name, artifact_location=self.artifact_location, lifecycle_stage=self.lifecycle_stage)
def test_string_repr(self): exp = Experiment( experiment_id=0, name="myname", artifact_location="hi", lifecycle_stage=LifecycleStage.ACTIVE, ) assert (str(exp) == "<Experiment: artifact_location='hi', experiment_id=0, " "lifecycle_stage='active', name='myname', tags={}>")
def list_experiments(self, view_type=ViewType.ACTIVE_ONLY): """ :return: a list of all known Experiment objects """ req_body = message_to_json(ListExperiments(view_type=view_type)) response_proto = self._call_endpoint(ListExperiments, req_body) return [ Experiment.from_proto(experiment_proto) for experiment_proto in response_proto.experiments ]
def test_creation_and_hydration(self): exp_id = random_int() name = "exp_%d_%d" % (random_int(), random_int()) lifecycle_stage = Experiment.ACTIVE_LIFECYCLE location = random_file(".json") exp = Experiment(exp_id, name, location, lifecycle_stage) self._check(exp, exp_id, name, location, lifecycle_stage) as_dict = {"experiment_id": exp_id, "name": name, "artifact_location": location, "lifecycle_stage": lifecycle_stage} self.assertEqual(dict(exp), as_dict) proto = exp.to_proto() exp2 = Experiment.from_proto(proto) self._check(exp2, exp_id, name, location, lifecycle_stage) exp3 = Experiment.from_dictionary(as_dict) self._check(exp3, exp_id, name, location, lifecycle_stage)
def test_resolve_experiment_id(experiment_id, experiment_name, expected): with mock.patch('mlflow.tracking.MlflowClient.get_experiment_by_name') \ as get_experiment_by_name_mock: get_experiment_by_name_mock.return_value = Experiment(experiment_id=33, name='Name', artifact_location=None, lifecycle_stage=None) exp_id = mlflow.projects._resolve_experiment_id(experiment_name=experiment_name, experiment_id=experiment_id) assert exp_id == expected
def get_experiment(self, experiment_id): """ Fetch the experiment from the backend store. :param experiment_id: String id for the experiment :return: A single :py:class:`mlflow.entities.Experiment` object if it exists, otherwise raises an Exception. """ req_body = message_to_json(GetExperiment(experiment_id=str(experiment_id))) response_proto = self._call_endpoint(GetExperiment, req_body) return Experiment.from_proto(response_proto.experiment)
def _get_experiment(self, experiment_id, view_type=ViewType.ALL): self._check_root_dir() experiment_dir = self._get_experiment_path(experiment_id, view_type) if experiment_dir is None: raise Exception("Could not find experiment with ID %s" % experiment_id) meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME) if experiment_dir.startswith(self.trash_folder): meta['lifecycle_stage'] = Experiment.DELETED_LIFECYCLE else: meta['lifecycle_stage'] = Experiment.ACTIVE_LIFECYCLE return Experiment.from_dictionary(meta)
def _dict_to_experiment(experiment_dict): dict_copy = experiment_dict.copy() # 'experiment_id' was changed from int to string, so we must cast to string # when reading legacy experiments if isinstance(dict_copy["experiment_id"], int): dict_copy["experiment_id"] = str(dict_copy["experiment_id"]) # Turn the key/value tags into list of experiment tags if "tags" in dict_copy: dict_copy["tags"] = [ ExperimentTag(kv[0], kv[1]) for kv in dict_copy["tags"].items() ] return Experiment.from_dictionary(dict_copy)
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): artifact_uri = artifact_uri or posixpath.join(self.artifact_root_uri, str(experiment_id)) self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) experiment_dict = dict(experiment) # tags are added to the file system and are not written to this dict on write # As such, we should not include them in the meta file. del experiment_dict['tags'] write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict) return experiment_id
def test_deploy_with_requirements_and_branch(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"test": {"jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) sample_requirements = "\n".join( ["pyspark=3.0.0", "xgboost=0.6.0", "pyspark3d"]) pathlib.Path("runtime_requirements.txt").write_text( sample_requirements) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner( deploy, [ "--environment", "test", "--requirements-file", "runtime_requirements.txt", "--branch-name", "test-branch", ], ) deleted_dependency_lines = [ line for line in deploy_result.stdout.splitlines() if "pyspark dependency deleted" in line ] self.assertEqual(len(deleted_dependency_lines), 1) self.assertEqual(deploy_result.exit_code, 0)
def test_no_runs(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"test": {"dbfs": {}, "jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner( deploy, ["--environment", "test", "--tags", "cake=cheesecake"]) self.assertEqual(deploy_result.exit_code, 0) launch_result = invoke_cli_runner( launch, [ "--environment", "test", "--job", "sample", "--tags", "cake=cheesecake", ], expected_error=True, ) self.assertIsNotNone(launch_result.exception) self.assertTrue("not found in underlying MLflow experiment" in str(launch_result.exception))
def _create_experiment_with_id(self, name, experiment_id, artifact_uri, tags): artifact_uri = artifact_uri or append_to_uri_path( self.artifact_root_uri, str(experiment_id) ) self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) experiment_dict = dict(experiment) # tags are added to the file system and are not written to this dict on write # As such, we should not include them in the meta file. del experiment_dict["tags"] write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict) if tags is not None: for tag in tags: self.set_experiment_tag(experiment_id, tag) return experiment_id
def test_with_permissions(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "default", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_file = pathlib.Path(DEFAULT_DEPLOYMENT_FILE_PATH) deploy_content = json.loads(deployment_file.read_text()) sample_job = deploy_content.get("default").get("jobs")[0] sample_job["permissions"] = { "access_control_list": [ { "user_name": "*****@*****.**", "permission_level": "IS_OWNER", }, { "group_name": "some-user-group", "permission_level": "CAN_VIEW" }, ] } deployment_file.write_text(json.dumps(deploy_content, indent=4)) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner(deploy, ["--environment", "default"]) self.assertEqual(deploy_result.exit_code, 0)
def get_experiment_by_name(self, experiment_name): try: req_body = message_to_json(GetExperimentByName(experiment_name=experiment_name)) response_proto = self._call_endpoint(GetExperimentByName, req_body) return Experiment.from_proto(response_proto.experiment) except MlflowException as e: if e.error_code == databricks_pb2.ErrorCode.Name( databricks_pb2.RESOURCE_DOES_NOT_EXIST ): return None elif e.error_code == databricks_pb2.ErrorCode.Name(databricks_pb2.ENDPOINT_NOT_FOUND): # Fall back to using ListExperiments-based implementation. for experiment in self.list_experiments(ViewType.ALL): if experiment.name == experiment_name: return experiment return None raise e
def _get_experiment(self, experiment_id, view_type=ViewType.ALL): self._check_root_dir() _validate_experiment_id(experiment_id) experiment_dir = self._get_experiment_path(experiment_id, view_type) if experiment_dir is None: raise MlflowException("Could not find experiment with ID %s" % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) meta = read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME) if experiment_dir.startswith(self.trash_folder): meta['lifecycle_stage'] = Experiment.DELETED_LIFECYCLE else: meta['lifecycle_stage'] = Experiment.ACTIVE_LIFECYCLE experiment = Experiment.from_dictionary(meta) if int(experiment_id) != experiment.experiment_id: logging.warning("Experiment ID mismatch for exp %s. ID recorded as '%s' in meta data. " "Experiment will be ignored.", str(experiment_id), str(experiment.experiment_id), exc_info=True) return None return experiment
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): artifact_uri = artifact_uri or append_to_uri_path( self.artifact_root_uri, str(experiment_id)) dynamodb = self._get_dynamodb_resource() table_name = "_".join( [self.table_prefix, DynamodbStore.EXPERIMENT_TABLE]) table = dynamodb.Table(table_name) exp = Experiment( experiment_id=experiment_id, name=name, artifact_location=artifact_uri, lifecycle_stage=LifecycleStage.ACTIVE, ) response = table.put_item( Item=_entity_to_dict(exp), ReturnConsumedCapacity="TOTAL", ) if response["ResponseMetadata"]["HTTPStatusCode"] != 200: raise MlflowException("DynamoDB connection error") return experiment_id
def test_launch_run_submit(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"test": {"dbfs": {}, "jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner( deploy, ["--environment", "test", "--tags", "cake=cheesecake"]) self.assertEqual(deploy_result.exit_code, 0) launch_result = invoke_cli_runner( launch, [ "--environment", "test", "--job", "sample", "--tags", "cake=cheesecake", "--as-run-submit" ], ) self.assertEqual(launch_result.exit_code, 0)
def test_write_specs_to_file(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "default", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) spec_file = ".dbx/deployment-result.json" with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner(deploy, [ "--environment", "default", "--write-specs-to-file", spec_file ]) self.assertEqual(deploy_result.exit_code, 0) spec_result = json.loads(pathlib.Path(spec_file).read_text()) self.assertIsNotNone(spec_result) deploy_overwrite = invoke_cli_runner(deploy, [ "--environment", "default", "--write-specs-to-file", spec_file ]) self.assertEqual(deploy_overwrite.exit_code, 0)
def test_deploy_incorrect_artifact_location(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "dbfs:/some/correct-location", None, None), ): deployment_content = {"test": {"jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) sample_config = read_json(INFO_FILE_PATH) sample_config["environments"]["test"][ "artifact_location"] = "dbfs:/some/another-location" write_json(sample_config, INFO_FILE_PATH) deploy_result = invoke_cli_runner(deploy, ["--environment", "test"], expected_error=True) self.assertIsInstance(deploy_result.exception, Exception) self.assertIn( "Please change the experiment name to create a new experiment", str(deploy_result.exception))
def test_deploy_non_existent_env(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = { "misconfigured-environment": { "dbfs": {}, "jobs": [] } } write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner(deploy, ["--environment", "test"], expected_error=True) self.assertIsInstance(deploy_result.exception, NameError) self.assertIn("non-existent in the deployment file", str(deploy_result.exception))
def test_deploy_listed_jobs(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = { "test": { "jobs": [{ "name": "job-1" }, { "name": "job-2" }] } } write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) with patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, f"dbfs:/dbx/{self.project_name}", None, None), ): deploy_result = invoke_cli_runner( deploy, ["--environment", "test", "--jobs", "job-1,job-2"]) self.assertEqual(deploy_result.exit_code, 0)
def test_execute(self, *args): # noqa with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) with patch( "mlflow.get_experiment_by_name", return_value=Experiment( "id", None, f"dbfs:/Shared/dbx/projects/{self.project_name}", None, None), ): execute_result = invoke_cli_runner( execute, [ "--environment", "default", "--cluster-id", "000-some-cluster-id", "--job", f"{self.project_name}-sample", ], ) self.assertEqual(execute_result.exit_code, 0)
def list_experiments( self, view_type=ViewType.ACTIVE_ONLY, max_results=None, page_token=None, ): """ :param view_type: Qualify requested type of experiments. :param max_results: If passed, specifies the maximum number of experiments desired. If not passed, the server will pick a maximum number of results to return. :param page_token: Token specifying the next page of results. It should be obtained from a ``list_experiments`` call. :return: A :py:class:`PagedList <mlflow.store.entities.PagedList>` of :py:class:`Experiment <mlflow.entities.Experiment>` objects. The pagination token for the next page can be obtained via the ``token`` attribute of the object. """ req_body = message_to_json( ListExperiments(view_type=view_type, max_results=max_results, page_token=page_token) ) response_proto = self._call_endpoint(ListExperiments, req_body) experiments = [Experiment.from_proto(x) for x in response_proto.experiments] # If the response doesn't contain `next_page_token`, `response_proto.next_page_token` # returns an empty string (default value for a string proto field). token = ( response_proto.next_page_token if response_proto.HasField("next_page_token") else None ) return PagedList(experiments, token)
class ExecuteTest(DbxTest): @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch( "databricks_cli.clusters.api.ClusterService.get_cluster", return_value={"cluster_name": "some-name", "state": "RUNNING"}, ) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", None, None), ) @patch("dbx.utils.common.ApiV1Client.create_context", return_value={"id": 1}) @patch("dbx.utils.common.ApiV1Client.execute_command", return_value={"id": 1}) @patch( "dbx.utils.common.ApiV1Client.get_command_status", return_value={ "status": "Finished", "results": {"resultType": "Ok", "data": "Ok!"}, }, ) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", None, None), ) @patch("mlflow.set_experiment", return_value=None) def test_execute(self, *args): # noqa with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) execute_result = invoke_cli_runner( execute, [ "--environment", "default", "--cluster-id", "000-some-cluster-id", "--job", f"{self.project_name}-sample", ], ) self.assertEqual(execute_result.exit_code, 0) @patch( "databricks_cli.clusters.api.ClusterService.list_clusters", return_value={ "clusters": [ { "cluster_name": "some-cluster-name", "cluster_id": "aaa-111" }, { "cluster_name": "other-cluster-name", "cluster_id": "aaa-bbb-ccc" }, { "cluster_name": "duplicated-name", "cluster_id": "duplicated-1" }, { "cluster_name": "duplicated-name", "cluster_id": "duplicated-2" }, ]}, ) @patch( "databricks_cli.clusters.api.ClusterService.get_cluster", side_effect=lambda cid: "something" if cid in ("aaa-bbb-ccc", "aaa-111") else None ) def test_preprocess_cluster_args(self, *args): # noqa api_client = Mock(ApiClient) self.assertRaises(RuntimeError, _preprocess_cluster_args, api_client, None, None) id_by_name = _preprocess_cluster_args(api_client, "some-cluster-name", None) self.assertEqual(id_by_name, "aaa-111") id_by_id = _preprocess_cluster_args(api_client, None, "aaa-bbb-ccc") self.assertEqual(id_by_id, "aaa-bbb-ccc") self.assertRaises(NameError, _preprocess_cluster_args, api_client, "non-existent-cluster-by-name", None) self.assertRaises(NameError, _preprocess_cluster_args, api_client, "duplicated-name", None) self.assertRaises(NameError, _preprocess_cluster_args, api_client, None, "non-existent-id") def test_awake_cluster(self): # normal behavior cluster_service_mock = Mock(ClusterService) cluster_service_mock.get_cluster.side_effect = [ {"state": "TERMINATED"}, {"state": "PENDING"}, {"state": "RUNNING"}, {"state": "RUNNING"}, ] awake_cluster(cluster_service_mock, "aaa-bbb") self.assertEqual(cluster_service_mock.get_cluster("aaa-bbb").get("state"), "RUNNING") # error behavior error_mock = Mock(ClusterService) error_mock.get_cluster.return_value = {"state": "ERROR"} self.assertRaises(RuntimeError, awake_cluster, error_mock, "aaa-bbb")
def test_message_to_json(): json_out = message_to_json( Experiment("123", "name", "arty", "active").to_proto()) assert json.loads(json_out) == { "experiment_id": "123", "name": "name", "artifact_location": "arty", "lifecycle_stage": "active", } original_proto_message = RegisteredModel( name="model_1", creation_timestamp=111, last_updated_timestamp=222, description="Test model", latest_versions=[ ModelVersion( name="mv-1", version="1", creation_timestamp=333, last_updated_timestamp=444, description="v 1", user_id="u1", current_stage="Production", source="A/B", run_id="9245c6ce1e2d475b82af84b0d36b52f4", status="READY", status_message=None, ), ModelVersion( name="mv-2", version="2", creation_timestamp=555, last_updated_timestamp=666, description="v 2", user_id="u2", current_stage="Staging", source="A/C", run_id="123", status="READY", status_message=None, ), ], ).to_proto() json_out = message_to_json(original_proto_message) json_dict = json.loads(json_out) assert json_dict == { "name": "model_1", "creation_timestamp": 111, "last_updated_timestamp": 222, "description": "Test model", "latest_versions": [ { "name": "mv-1", "version": "1", "creation_timestamp": 333, "last_updated_timestamp": 444, "current_stage": "Production", "description": "v 1", "user_id": "u1", "source": "A/B", "run_id": "9245c6ce1e2d475b82af84b0d36b52f4", "status": "READY", }, { "name": "mv-2", "version": "2", "creation_timestamp": 555, "last_updated_timestamp": 666, "current_stage": "Staging", "description": "v 2", "user_id": "u2", "source": "A/C", "run_id": "123", "status": "READY", }, ], } new_proto_message = ProtoRegisteredModel() parse_dict(json_dict, new_proto_message) assert original_proto_message == new_proto_message test_message = ParseTextIntoProto( """ field_int32: 11 field_int64: 12 field_uint32: 13 field_uint64: 14 field_sint32: 15 field_sint64: 16 field_fixed32: 17 field_fixed64: 18 field_sfixed32: 19 field_sfixed64: 20 field_bool: true field_string: "Im a string" field_with_default1: 111 field_repeated_int64: [1, 2, 3] field_enum: ENUM_VALUE1 field_inner_message { field_inner_int64: 101 field_inner_repeated_int64: [102, 103] } field_inner_message { field_inner_int64: 104 field_inner_repeated_int64: [105, 106] } oneof1: 207 [mlflow.ExtensionMessage.field_extended_int64]: 100 field_map1: [{key: 51 value: "52"}, {key: 53 value: "54"}] field_map2: [{key: "61" value: 62}, {key: "63" value: 64}] field_map3: [{key: 561 value: 562}, {key: 563 value: 564}] field_map4: [{key: 71 value: {field_inner_int64: 72 field_inner_repeated_int64: [81, 82] field_inner_string: "str1"}}, {key: 73 value: {field_inner_int64: 74 field_inner_repeated_int64: 83 field_inner_string: "str2"}}] """, TestMessage(), ) json_out = message_to_json(test_message) json_dict = json.loads(json_out) assert json_dict == { "field_int32": 11, "field_int64": 12, "field_uint32": 13, "field_uint64": 14, "field_sint32": 15, "field_sint64": 16, "field_fixed32": 17, "field_fixed64": 18, "field_sfixed32": 19, "field_sfixed64": 20, "field_bool": True, "field_string": "Im a string", "field_with_default1": 111, "field_repeated_int64": [1, 2, 3], "field_enum": "ENUM_VALUE1", "field_inner_message": [ { "field_inner_int64": 101, "field_inner_repeated_int64": [102, 103] }, { "field_inner_int64": 104, "field_inner_repeated_int64": [105, 106] }, ], "oneof1": 207, # JSON doesn't support non-string keys, so the int keys will be converted to strings. "field_map1": { "51": "52", "53": "54" }, "field_map2": { "63": 64, "61": 62 }, "field_map3": { "561": 562, "563": 564 }, "field_map4": { "73": { "field_inner_int64": 74, "field_inner_repeated_int64": [83], "field_inner_string": "str2", }, "71": { "field_inner_int64": 72, "field_inner_repeated_int64": [81, 82], "field_inner_string": "str1", }, }, "[mlflow.ExtensionMessage.field_extended_int64]": "100", } new_test_message = TestMessage() parse_dict(json_dict, new_test_message) assert new_test_message == test_message
import pytest import mock from mlflow.entities import (Experiment, Run, RunInfo, RunData, RunTag, Metric, Param, ExperimentTag, RunStatus, LifecycleStage, ViewType) experiment = Experiment(experiment_id="1", name="experiment_name", artifact_location="artifact_location", lifecycle_stage=LifecycleStage.ACTIVE, tags=[]) run_info = RunInfo(run_uuid="1", run_id="1", experiment_id="experiment_id", user_id="unknown", status=RunStatus.to_string(RunStatus.RUNNING), start_time=1, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE, artifact_uri="artifact_uri") run_data = RunData(metrics=[], params=[], tags=[]) run = Run(run_info=run_info, run_data=run_data) metric = Metric(key="metric1", value=1, timestamp=1, step=1) param = Param(key="param1", value="val1") tag = RunTag(key="tag1", value="val1") experiment_tag = ExperimentTag(key="tag1", value="val1")
class DeployTest(DbxTest): @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch("mlflow.set_experiment", return_value=None) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) @patch("databricks_cli.configure.config._get_api_client", return_value=None) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", None, None), ) @patch("mlflow.set_experiment", return_value=None) def test_deploy_basic(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"test": {"jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) deploy_result = invoke_cli_runner(deploy, ["--environment", "test"]) self.assertEqual(deploy_result.exit_code, 0) @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", None, None), ) @patch("mlflow.set_experiment", return_value=None) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) def test_deploy_non_existent_env(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"misconfigured-environment": {"dbfs": {}, "jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) deploy_result = invoke_cli_runner(deploy, ["--environment", "test"], expected_error=True) self.assertIsInstance(deploy_result.exception, NameError) self.assertIn("non-existent in the deployment file", str(deploy_result.exception)) @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True) @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []}) @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"}) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", None, None), ) @patch("mlflow.set_experiment", return_value=None) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) def test_deploy_listed_jobs(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"test": {"jobs": [{"name": "job-1"}, {"name": "job-2"}]}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) deploy_result = invoke_cli_runner(deploy, ["--environment", "test", "--jobs", "job-1,job-2"]) self.assertEqual(deploy_result.exit_code, 0) @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True) @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []}) @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"}) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", None, None), ) @patch("mlflow.set_experiment", return_value=None) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) def test_deploy_with_requirements(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "test", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_content = {"test": {"jobs": []}} write_json(deployment_content, DEFAULT_DEPLOYMENT_FILE_PATH) sample_requirements = "\n".join(["pyspark=3.0.0", "xgboost=0.6.0"]) pathlib.Path("runtime_requirements.txt").write_text(sample_requirements) deploy_result = invoke_cli_runner( deploy, [ "--environment", "test", "--requirements-file", "runtime_requirements.txt", ], ) self.assertEqual(deploy_result.exit_code, 0) def test_update_job_positive(self): js = Mock(JobsService) _update_job(js, "aa-bbb-ccc-111", {"name": 1}) self.assertEqual(0, 0) # dummy test to verify positive case def test_update_job_negative(self): js = Mock(JobsService) js.reset_job.side_effect = Mock(side_effect=HTTPError()) self.assertRaises(HTTPError, _update_job, js, "aa-bbb-ccc-111", {"name": 1}) @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True) @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []}) @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"}) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", "dbfs:/Shared/dbx/test", None), ) @patch("mlflow.set_experiment", return_value=None) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) def test_write_specs_to_file(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "default", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) spec_file = ".dbx/deployment-result.json" deploy_result = invoke_cli_runner(deploy, ["--environment", "default", "--write-specs-to-file", spec_file]) self.assertEqual(deploy_result.exit_code, 0) spec_result = json.loads(pathlib.Path(spec_file).read_text()) self.assertIsNotNone(spec_result) deploy_overwrite = invoke_cli_runner( deploy, ["--environment", "default", "--write-specs-to-file", spec_file] ) self.assertEqual(deploy_overwrite.exit_code, 0) @patch("databricks_cli.sdk.api_client.ApiClient.perform_query", return_value=None) @patch("databricks_cli.sdk.service.DbfsService.get_status", return_value=None) @patch( "databricks_cli.configure.provider.ProfileConfigProvider.get_config", return_value=test_dbx_config, ) @patch("databricks_cli.workspace.api.WorkspaceService.mkdirs", return_value=True) @patch("databricks_cli.workspace.api.WorkspaceService.get_status", return_value=True) @patch("databricks_cli.jobs.api.JobsService.list_jobs", return_value={"jobs": []}) @patch("databricks_cli.jobs.api.JobsApi.create_job", return_value={"job_id": "1"}) @patch( "mlflow.get_experiment_by_name", return_value=Experiment("id", None, "location", "dbfs:/Shared/dbx/test", None), ) @patch("mlflow.set_experiment", return_value=None) @patch("mlflow.start_run", return_value=run_mock) @patch("mlflow.log_artifact", return_value=None) @patch("mlflow.set_tags", return_value=None) def test_with_permissions(self, *_): with self.project_dir: ws_dir = "/Shared/dbx/projects/%s" % self.project_name configure_result = invoke_cli_runner( configure, [ "--environment", "default", "--profile", self.profile_name, "--workspace-dir", ws_dir, ], ) self.assertEqual(configure_result.exit_code, 0) deployment_file = pathlib.Path(DEFAULT_DEPLOYMENT_FILE_PATH) deploy_content = json.loads(deployment_file.read_text()) sample_job = deploy_content.get("default").get("jobs")[0] sample_job["permissions"] = { "access_control_list": [ { "user_name": "*****@*****.**", "permission_level": "IS_OWNER", }, {"group_name": "some-user-group", "permission_level": "CAN_VIEW"}, ] } deployment_file.write_text(json.dumps(deploy_content, indent=4)) deploy_result = invoke_cli_runner(deploy, ["--environment", "default"]) self.assertEqual(deploy_result.exit_code, 0)