def test_correct_filtering(filter_string, matching_runs): runs = [ Run(run_info=RunInfo( run_uuid="hi", run_id="hi", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 121, 1, 0)], params=[Param("my_param", "A")], tags=[])), Run(run_info=RunInfo( run_uuid="hi2", run_id="hi2", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 123, 1, 0)], params=[Param("my_param", "A")], tags=[RunTag("tag1", "C")])), Run(run_info=RunInfo( run_uuid="hi3", run_id="hi3", experiment_id=1, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 125, 1, 0)], params=[Param("my_param", "B")], tags=[RunTag("tag1", "D")])), ] filtered_runs = SearchUtils.filter(runs, filter_string) assert set(filtered_runs) == set([runs[i] for i in matching_runs])
def test_creation_and_hydration(self): (ri1, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri) = self._create() self._check(ri1, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri) as_dict = { "run_uuid": run_id, "run_id": run_id, "experiment_id": experiment_id, "user_id": user_id, "status": status, "start_time": start_time, "end_time": end_time, "lifecycle_stage": lifecycle_stage, "artifact_uri": artifact_uri } self.assertEqual(dict(ri1), as_dict) proto = ri1.to_proto() ri2 = RunInfo.from_proto(proto) self._check(ri2, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri) ri3 = RunInfo.from_dictionary(as_dict) self._check(ri3, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri) # Test that we can add a field to RunInfo and still deserialize it from a dictionary dict_copy_0 = as_dict.copy() dict_copy_0["my_new_field"] = "new field value" ri4 = RunInfo.from_dictionary(dict_copy_0) self._check(ri4, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri)
def test_pagination(page_token, max_results, matching_runs, expected_next_page_token): runs = [ Run(run_info=RunInfo( run_uuid="0", run_id="0", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData([], [], [])), Run(run_info=RunInfo( run_uuid="1", run_id="1", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData([], [], [])), Run(run_info=RunInfo( run_uuid="2", run_id="2", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData([], [], [])) ] encoded_page_token = None if page_token: encoded_page_token = base64.b64encode(json.dumps(page_token).encode("utf-8")) paginated_runs, next_page_token = SearchUtils.paginate(runs, encoded_page_token, max_results) paginated_run_indices = [] for run in paginated_runs: for i, r in enumerate(runs): if r == run: paginated_run_indices.append(i) break assert paginated_run_indices == matching_runs decoded_next_page_token = None if next_page_token: decoded_next_page_token = json.loads(base64.b64decode(next_page_token)) assert decoded_next_page_token == expected_next_page_token
def test_creation_and_hydration(self): (ri1, run_uuid, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, artifact_uri) = self._create() self._check(ri1, run_uuid, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, artifact_uri) as_dict = { "run_uuid": run_uuid, "experiment_id": experiment_id, "name": name, "source_type": source_type, "source_name": source_name, "entry_point_name": entry_point_name, "user_id": user_id, "status": status, "start_time": start_time, "end_time": end_time, "source_version": source_version, "artifact_uri": artifact_uri, } self.assertEqual(dict(ri1), as_dict) proto = ri1.to_proto() ri2 = RunInfo.from_proto(proto) self._check(ri2, run_uuid, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, artifact_uri) ri3 = RunInfo.from_dictionary(as_dict) self._check(ri3, run_uuid, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, artifact_uri)
def test_correct_sorting(order_bys, matching_runs): runs = [ Run(run_info=RunInfo( run_uuid="9", run_id="9", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 121, 1, 0)], params=[Param("my_param", "A")], tags=[])), Run(run_info=RunInfo( run_uuid="8", run_id="8", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED), start_time=1, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 123, 1, 0)], params=[Param("my_param", "A")], tags=[RunTag("tag1", "C")])), Run(run_info=RunInfo( run_uuid="7", run_id="7", experiment_id=1, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=1, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData( metrics=[Metric("key1", 125, 1, 0)], params=[Param("my_param", "B")], tags=[RunTag("tag1", "D")])), ] sorted_runs = SearchUtils.sort(runs, order_bys) sorted_run_indices = [] for run in sorted_runs: for i, r in enumerate(runs): if r == run: sorted_run_indices.append(i) break assert sorted_run_indices == matching_runs
def test_create_model_version_run_link_in_notebook_with_default_profile( mock_registry_store): experiment_id = 'test-exp-id' hostname = 'https://workspace.databricks.com/' workspace_id = '10002' run_id = 'runid' workspace_url = construct_run_url(hostname, experiment_id, run_id, workspace_id) get_run_mock = mock.MagicMock() get_run_mock.return_value = Run( RunInfo(run_id, experiment_id, 'userid', 'status', 0, 1, None), None) with mock.patch('mlflow.tracking.client.is_in_databricks_notebook', return_value=True), \ mock.patch('mlflow.tracking.client.get_workspace_info_from_dbutils', return_value=(hostname, workspace_id)): client = MlflowClient(tracking_uri='databricks', registry_uri='otherplace') client.get_run = get_run_mock mock_registry_store.create_model_version.return_value = \ ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=workspace_url) model_version = client.create_model_version('name', 'source', 'runid') assert (model_version.run_link == workspace_url) # verify that the client generated the right URL mock_registry_store.create_model_version.assert_called_once_with( "name", 'source', 'runid', [], workspace_url)
def test_order_by_metric_with_nans_infs_nones(): metric_vals_str = ["nan", "inf", "-inf", "-1000", "0", "1000", "None"] runs = [ Run( run_info=RunInfo( run_id=x, run_uuid=x, experiment_id=0, user_id="user", status=RunStatus.to_string(RunStatus.FINISHED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE, ), run_data=RunData( metrics=[Metric("x", None if x == "None" else float(x), 1, 0) ]), ) for x in metric_vals_str ] sorted_runs_asc = [ x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x asc"]) ] sorted_runs_desc = [ x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x desc"]) ] # asc assert ["-inf", "-1000", "0", "1000", "inf", "nan", "None"] == sorted_runs_asc # desc assert ["inf", "1000", "0", "-1000", "-inf", "nan", "None"] == sorted_runs_desc
def create_run(self, experiment_id, user_id, start_time, tags): """ Create a run under the specified experiment ID, setting the run's status to "RUNNING" and the start time to the current time. :param experiment_id: String id of the experiment for this run :param user_id: ID of the user launching this run :return: The created Run object """ if experiment_id is None: experiment_id = MLMDStore.DEFAULT_EXPERIMENT_ID experiment = self.get_experiment(experiment_id) run_uuid = uuid.uuid4().hex run_info = RunInfo(run_uuid=run_uuid, run_id=run_uuid, experiment_id=experiment_id, artifact_uri=None, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE) for tag in tags: self.set_tag(run_uuid, tag) exec_name = run_uuid self.mldm_exec = self._get_or_create_run_execution( self.mldm_workspace, self.mldm_run, exec_name) print("Run " + exec_name + " created") return self.get_run(run_id=run_uuid)
def _create(): run_uuid = str(uuid.uuid4()) experiment_id = random_int(10, 2000) name = random_str(random_int(10, 40)) source_type = random_int(1, 4) source_name = random_str(random_int(100, 300)) entry_point_name = random_str(random_int(100, 300)) user_id = random_str(random_int(10, 25)) status = random_int(1, 5) start_time = random_int(1, 10) end_time = start_time + random_int(1, 10) source_version = random_str(random_int(10, 40)) tags = [ RunTag(key=random_str(random_int(1, 5)), value=random_str(random_int(1, 5))) for _ in range(2) ] artifact_uri = random_str(random_int(10, 40)) ri = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name=name, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=status, start_time=start_time, end_time=end_time, source_version=source_version, tags=tags, artifact_uri=artifact_uri) return (ri, run_uuid, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, tags, artifact_uri)
def test_get_run(init_store): expected_run_info = RunInfo(run_uuid="7b2e71956f3d4c08b042624a8d83700d", experiment_id="hTb553MBNoOYfhXjnnQh", user_id="1", status="RUNNING", start_time=1597324762662, end_time=None, lifecycle_stage="active", artifact_uri="artifact_path/7b2e71956f3d4c08b042624a8d83700d" "/artifacts", run_id="7b2e71956f3d4c08b042624a8d83700d") expected_metrics = [Metric(key="metric0", value=20.0, timestamp=1597324762778, step=2), Metric(key="metric1", value=7.0, timestamp=1597324762890, step=2)] expected_params = [Param(key="param0", value="val2"), Param(key="param1", value="Val1"), Param(key="param2", value="Val1"), Param(key="param3", value="valeur4")] expected_tags = [RunTag(key="tag0", value="val2"), RunTag(key="tag1", value="test3"), RunTag(key="tag2", value="val2"), RunTag(key="tag3", value="test3")] expected_run_data = RunData(metrics=expected_metrics, params=expected_params, tags=expected_tags) run = init_store.get_run(expected_run_info._run_id) assert run._info == expected_run_info for i, metric in enumerate(run._data._metric_objs): assert metric.__dict__ == expected_run_data._metric_objs[i].__dict__ assert run._data._params == expected_run_data._params assert run._data._tags == expected_run_data._tags
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags): """ Creates a run with the specified attributes. """ if self.get_experiment(experiment_id) is None: raise Exception( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) num_runs = len(self._list_run_uuids(experiment_id)) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="Run %s" % num_runs, artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, tags=tags) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, dict(run_info)) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) return Run(run_info=run_info, run_data=None)
def _create(): run_id = str(uuid.uuid4()) experiment_id = str(random_int(10, 2000)) user_id = random_str(random_int(10, 25)) status = RunStatus.to_string(random.choice(RunStatus.all_status())) start_time = random_int(1, 10) end_time = start_time + random_int(1, 10) lifecycle_stage = LifecycleStage.ACTIVE artifact_uri = random_str(random_int(10, 40)) ri = RunInfo( run_uuid=run_id, run_id=run_id, experiment_id=experiment_id, user_id=user_id, status=status, start_time=start_time, end_time=end_time, lifecycle_stage=lifecycle_stage, artifact_uri=artifact_uri, ) return ( ri, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage, artifact_uri, )
def _create(): run_id = str(uuid.uuid4()) experiment_id = str(random_int(10, 2000)) name = random_str(random_int(10, 40)) source_type = random_int(1, 4) source_name = random_str(random_int(100, 300)) entry_point_name = random_str(random_int(100, 300)) user_id = random_str(random_int(10, 25)) status = random_int(1, 5) start_time = random_int(1, 10) end_time = start_time + random_int(1, 10) source_version = random_str(random_int(10, 40)) lifecycle_stage = LifecycleStage.ACTIVE artifact_uri = random_str(random_int(10, 40)) ri = RunInfo(run_uuid=run_id, run_id=run_id, experiment_id=experiment_id, name=name, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=status, start_time=start_time, end_time=end_time, source_version=source_version, lifecycle_stage=lifecycle_stage, artifact_uri=artifact_uri) return (ri, run_id, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, lifecycle_stage, artifact_uri)
def test_create_model_version_run_link_with_configured_profile( mock_registry_store): experiment_id = "test-exp-id" hostname = "https://workspace.databricks.com/" workspace_id = "10002" run_id = "runid" workspace_url = construct_run_url(hostname, experiment_id, run_id, workspace_id) get_run_mock = mock.MagicMock() get_run_mock.return_value = Run( RunInfo(run_id, experiment_id, "userid", "status", 0, 1, None), None) with mock.patch( "mlflow.tracking.client.is_in_databricks_notebook", return_value=False ), mock.patch( "mlflow.tracking.client.get_workspace_info_from_databricks_secrets", return_value=(hostname, workspace_id), ): client = MlflowClient(tracking_uri="databricks", registry_uri="otherplace") client.get_run = get_run_mock mock_registry_store.create_model_version.return_value = ModelVersion( "name", 1, 0, 1, source="source", run_id=run_id, run_link=workspace_url) model_version = client.create_model_version("name", "source", "runid") assert model_version.run_link == workspace_url # verify that the client generated the right URL mock_registry_store.create_model_version.assert_called_once_with( "name", "source", "runid", [], workspace_url, None)
def test_string_repr(self): run_info = RunInfo(run_uuid="hi", run_id="hi", experiment_id=0, name="name", source_type=SourceType.PROJECT, source_name="source-name", entry_point_name="entry-point-name", user_id="user-id", status=RunStatus.FAILED, start_time=0, end_time=1, source_version="version", lifecycle_stage=LifecycleStage.ACTIVE) metrics = [ Metric(key="key-%s" % i, value=i, timestamp=0, step=i) for i in range(3) ] run_data = RunData(metrics=metrics, params=[], tags=[]) run1 = Run(run_info, run_data) expected = ( "<Run: data=<RunData: metrics={'key-0': 0, 'key-1': 1, 'key-2': 2}, " "params={}, tags={}>, info=<RunInfo: artifact_uri=None, end_time=1, " "entry_point_name='entry-point-name', experiment_id=0, " "lifecycle_stage='active', name='name', run_id='hi', run_uuid='hi', " "source_name='source-name', source_type=3, source_version='version', " "start_time=0, status=4, user_id='user-id'>>") assert str(run1) == expected
def update_run_info(self, run_id, run_status, end_time): """ Updates the metadata of the specified run. """ req_body = message_to_json( UpdateRun(run_uuid=run_id, run_id=run_id, status=run_status, end_time=end_time) ) response_proto = self._call_endpoint(UpdateRun, req_body) return RunInfo.from_proto(response_proto.run_info)
def create_run(self, experiment_id, user_id, start_time, tags): """ Creates a run with the specified attributes. """ experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id experiment = self.get_experiment(experiment_id) if experiment is None: raise MlflowException( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "Could not create run under non-active experiment with ID " "%s." % experiment_id, databricks_pb2.INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, run_id=run_uuid, experiment_id=experiment_id, artifact_uri=artifact_uri, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id) mkdir(run_dir) run_info_dict = _make_persisted_run_info_dict(run_info) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) return self.get_run(run_id=run_uuid)
def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Run`. """ run_info = RunInfo( run_uuid=self.run_uuid, run_id=self.run_uuid, experiment_id=str(self.experiment_id), user_id=self.user_id, status=self.status, start_time=self.start_time, end_time=self.end_time, lifecycle_stage=self.lifecycle_stage, artifact_uri=self.artifact_uri, ) run_data = RunData( metrics=[m.to_mlflow_entity() for m in self.latest_metrics], params=[p.to_mlflow_entity() for p in self.params], tags=[t.to_mlflow_entity() for t in self.tags], ) return Run(run_info=run_info, run_data=run_data)
def mlflow_run( name=RUN_NAME, status="RUNNING", end_time=None, lifecycle_stage=LifecycleStage.ACTIVE, name_tag=RunTag(MLFLOW_RUN_NAME, RUN_NAME), parent_run_id_tag=RunTag(MLFLOW_PARENT_RUN_ID, PARENT_RUN_UUID_HEX_STR), ): tags = [MLFLOW_TAG] if name_tag is not None: tags.append(name_tag) if parent_run_id_tag is not None: tags.append(parent_run_id_tag) data = RunData(params=[MLFLOW_PARAM], metrics=[MLFLOW_METRIC], tags=tags) info = RunInfo( run_uuid=RUN_UUID_HEX_STR, experiment_id=str(EXPERIMENT_ID), user_id="", status=status, start_time=RUN_STARTED_AT_MILLISECONDS, end_time=end_time, lifecycle_stage=lifecycle_stage, artifact_uri=ARTIFACT_LOCATION, run_id=RUN_UUID_HEX_STR, ) return Run(info, data)
def create_run( run_id="", exp_id="", uid="", start=0, end=0, metrics=None, params=None, tags=None, status=RunStatus.FINISHED, a_uri=None, ): return Run( RunInfo( run_uuid=run_id, run_id=run_id, experiment_id=exp_id, user_id=uid, status=status, start_time=start, end_time=end, lifecycle_stage=LifecycleStage.ACTIVE, artifact_uri=a_uri, ), RunData(metrics=metrics, params=params, tags=tags), )
def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Run`. """ run_info = RunInfo(run_uuid=self.run_uuid, run_id=self.run_uuid, experiment_id=str(self.experiment_id), user_id=self.user_id, status=self.status, start_time=self.start_time, end_time=self.end_time, lifecycle_stage=self.lifecycle_stage, artifact_uri=self.artifact_uri) # only get latest recorded metrics per key all_metrics = [m.to_mlflow_entity() for m in self.metrics] metrics = {} for m in all_metrics: existing_metric = metrics.get(m.key) if (existing_metric is None)\ or ((m.step, m.timestamp, m.value) >= (existing_metric.step, existing_metric.timestamp, existing_metric.value)): metrics[m.key] = m run_data = RunData(metrics=list(metrics.values()), params=[p.to_mlflow_entity() for p in self.params], tags=[t.to_mlflow_entity() for t in self.tags]) return Run(run_info=run_info, run_data=run_data)
def _read_persisted_run_info_dict(run_info_dict): dict_copy = run_info_dict.copy() if 'lifecycle_stage' not in dict_copy: dict_copy['lifecycle_stage'] = LifecycleStage.ACTIVE # 'experiment_id' was changed from int to string, so we must cast to string # when reading legacy run_infos if isinstance(dict_copy["experiment_id"], int): dict_copy["experiment_id"] = str(dict_copy["experiment_id"]) return RunInfo.from_dictionary(dict_copy)
def faculty_run_to_mlflow_run(faculty_run): lifecycle_stage = ( LifecycleStage.ACTIVE if faculty_run.deleted_at is None else LifecycleStage.DELETED ) start_time = _datetime_to_mlflow_timestamp(faculty_run.started_at) end_time = ( _datetime_to_mlflow_timestamp(faculty_run.ended_at) if faculty_run.ended_at is not None else None ) tag_dict = {tag.key: tag.value for tag in faculty_run.tags} extra_mlflow_tags = [] # Set run name tag if set as attribute but not already a tag if MLFLOW_RUN_NAME not in tag_dict and faculty_run.name: extra_mlflow_tags.append(RunTag(MLFLOW_RUN_NAME, faculty_run.name)) # Set parent run ID tag if set as attribute but not already a tag if ( MLFLOW_PARENT_RUN_ID not in tag_dict and faculty_run.parent_run_id is not None ): extra_mlflow_tags.append( RunTag(MLFLOW_PARENT_RUN_ID, faculty_run.parent_run_id.hex) ) run_info = RunInfo( run_uuid=faculty_run.id.hex, experiment_id=str(faculty_run.experiment_id), user_id="", status=_FACULTY_TO_MLFLOW_RUN_STATUS_MAP[faculty_run.status], start_time=start_time, end_time=end_time, lifecycle_stage=lifecycle_stage, artifact_uri=faculty_run.artifact_location, run_id=faculty_run.id.hex, ) run_data = RunData( params=[ faculty_param_to_mlflow_param(param) for param in faculty_run.params ], metrics=[ faculty_metric_to_mlflow_metric(metric) for metric in faculty_run.metrics ], tags=[faculty_tag_to_mlflow_tag(tag) for tag in faculty_run.tags] + extra_mlflow_tags, ) run = Run(run_info, run_data) return run
def _hit_to_mlflow_run_info(self, hit: Any) -> RunInfo: return RunInfo( run_uuid=hit.run_id, run_id=hit.run_id, experiment_id=str(hit.experiment_id), user_id=hit.user_id, status=hit.status, start_time=hit.start_time, end_time=hit.end_time if hasattr(hit, 'end_time') else None, lifecycle_stage=hit.lifecycle_stage if hasattr( hit, 'lifecycle_stage') else None, artifact_uri=hit.artifact_uri if hasattr(hit, 'artifact_uri') else None)
def test_bad_comparators(entity_type, bad_comparators, key, entity_value): run = Run(run_info=RunInfo( run_uuid="hi", run_id="hi", experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED), start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE), run_data=RunData(metrics=[], params=[], tags=[]) ) for bad_comparator in bad_comparators: bad_filter = "{entity_type}.{key} {comparator} {value}".format( entity_type=entity_type, key=key, comparator=bad_comparator, value=entity_value) with pytest.raises(MlflowException) as e: SearchUtils.filter([run], bad_filter) assert "Invalid comparator" in str(e.value.message)
def _read_persisted_run_info_dict(run_info_dict): dict_copy = run_info_dict.copy() if 'lifecycle_stage' not in dict_copy: dict_copy['lifecycle_stage'] = LifecycleStage.ACTIVE # 'status' is stored as an integer enum in meta file, but RunInfo.status field is a string. # converting to string before hydrating RunInfo. # If 'status' value not recorded in files, mark it as 'RUNNING' (default) dict_copy['status'] = RunStatus.to_string(run_info_dict.get('status', RunStatus.RUNNING)) # 'experiment_id' was changed from int to string, so we must cast to string # when reading legacy run_infos if isinstance(dict_copy["experiment_id"], int): dict_copy["experiment_id"] = str(dict_copy["experiment_id"]) return RunInfo.from_dictionary(dict_copy)
def to_mlflow_entity(self) -> Run: run_info = RunInfo(run_uuid=self.meta.id, run_id=self.meta.id, experiment_id=str(self.experiment_id), user_id=self.user_id, status=self.status, start_time=self.start_time, end_time=self.end_time, lifecycle_stage=self.lifecycle_stage, artifact_uri=self.artifact_uri) run_data = RunData( metrics=[m.to_mlflow_entity() for m in self.latest_metrics], params=[p.to_mlflow_entity() for p in self.params], tags=[t.to_mlflow_entity() for t in self.tags]) return Run(run_info=run_info, run_data=run_data)
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): """ Creates a run with the specified attributes. """ experiment = self.get_experiment(experiment_id) if experiment is None: raise MlflowException( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "Could not create run under non-active experiment with ID " "%s." % experiment_id, databricks_pb2.INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="", artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=LifecycleStage.ACTIVE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, _make_persisted_run_info_dict(run_info)) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) if parent_run_id: self.set_tag(run_uuid, RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id)) if run_name: self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return Run(run_info=run_info, run_data=None)
def test_filter_runs_by_start_time(): runs = [ Run( run_info=RunInfo( run_uuid=run_id, run_id=run_id, experiment_id=0, user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED), start_time=idx, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE, ), run_data=RunData(), ) for idx, run_id in enumerate(["a", "b", "c"]) ] assert SearchUtils.filter(runs, "attribute.start_time >= 0") == runs assert SearchUtils.filter(runs, "attribute.start_time > 1") == runs[2:] assert SearchUtils.filter(runs, "attribute.start_time = 2") == runs[2:]
def test_get_artifact_repo(artifact_uri, databricks_uri, uri_for_repo): with mock.patch( "mlflow.tracking._tracking_service.client.TrackingServiceClient.get_run", return_value=Run( RunInfo("uuid", "expr_id", "userid", "status", 0, 10, "active", artifact_uri=artifact_uri), None, ), ), mock.patch( "mlflow.tracking._tracking_service.client.get_artifact_repository", return_value=None) as get_repo_mock: client = TrackingServiceClient(databricks_uri) client._get_artifact_repo("some-run-id") get_repo_mock.assert_called_once_with(uri_for_repo)