def _create_entity(base, model): # create dict of kwargs properties for entity and return the initialized entity config = {} for k in base._properties(): # check if its mlflow entity and build it obj = getattr(model, k) if isinstance(model, SqlRun): if base is RunData: # Run data contains list for metrics, params and tags # so obj will be a list so we need to convert those items if k == 'metrics': # only get latest recorded metrics per key metrics = {} for o in obj: if o.key not in metrics or o.timestamp > metrics.get(o.key).timestamp: metrics[o.key] = Metric(o.key, o.value, o.timestamp) obj = metrics.values() elif k == 'params': obj = [Param(o.key, o.value) for o in obj] elif k == 'tags': obj = [RunTag(o.key, o.value) for o in obj] elif base is RunInfo: if k == 'source_type': obj = SourceType.from_string(obj) elif k == "status": obj = RunStatus.from_string(obj) config[k] = obj return base(**config)
def test_client_create_run_overrides(mock_store): experiment_id = mock.Mock() user_id = mock.Mock() run_name = mock.Mock() start_time = mock.Mock() tags = { MLFLOW_PARENT_RUN_ID: mock.Mock(), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), MLFLOW_SOURCE_NAME: mock.Mock(), MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(), MLFLOW_GIT_COMMIT: mock.Mock(), "other-key": "other-value" } MlflowClient().create_run(experiment_id, user_id, run_name, start_time, tags) mock_store.create_run.assert_called_once_with( experiment_id=experiment_id, user_id=user_id, run_name=run_name, start_time=start_time, tags=[RunTag(key, value) for key, value in tags.items()], parent_run_id=tags[MLFLOW_PARENT_RUN_ID], source_type=SourceType.JOB, source_name=tags[MLFLOW_SOURCE_NAME], entry_point_name=tags[MLFLOW_PROJECT_ENTRY_POINT], source_version=tags[MLFLOW_GIT_COMMIT])
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): """ Create a run under the specified experiment ID, setting the run's status to "RUNNING" and the start time to the current time. :param experiment_id: ID of the experiment for this run :param user_id: ID of the user launching this run :param source_type: Enum (integer) describing the source of the run :return: The created Run object """ tag_protos = [tag.to_proto() for tag in tags] req_body = message_to_json( CreateRun(experiment_id=experiment_id, user_id=user_id, run_name="", source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, start_time=start_time, source_version=source_version, tags=tag_protos, parent_run_id=parent_run_id)) response_proto = self._call_endpoint(CreateRun, req_body) run = Run.from_proto(response_proto.run) if run_name: # TODO: optimization: This is making 2 calls to backend store. Include with above call. self.set_tag(run.info.run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return run
def create_run(self, experiment_id, user_id=None, run_name=None, source_type=None, source_name=None, entry_point_name=None, start_time=None, source_version=None, tags=None): """Creates a new :py:class:`mlflow.entities.Run` object, which can be associated with metrics, parameters, artifacts, etc. Unlike :py:func:`mlflow.projects.run`, does not actually run code, just creates objects. Unlike :py:func:`mlflow.start_run`, this does not change the "active run" used by :py:func:`mlflow.log_param` and friends. :param user_id: If not provided, we will use the current user as a default. :param start_time: If not provided, we will use the current timestamp. :param tags: A dictionary of key-value pairs which will be converted into RunTag objects. :return: :py:class:`mlflow.entities.Run` which was created """ tags = tags if tags else {} return self.store.create_run( experiment_id=experiment_id, user_id=user_id if user_id is not None else _get_user_id(), run_name=run_name, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, start_time=start_time or int(time.time() * 1000), source_version=source_version, tags=[RunTag(key, value) for (key, value) in iteritems(tags)], )
def _create(): run_uuid = str(uuid.uuid4()) experiment_id = random_int(10, 2000) name = random_str(random_int(10, 40)) source_type = random_int(1, 4) source_name = random_str(random_int(100, 300)) entry_point_name = random_str(random_int(100, 300)) user_id = random_str(random_int(10, 25)) status = random_int(1, 5) start_time = random_int(1, 10) end_time = start_time + random_int(1, 10) source_version = random_str(random_int(10, 40)) tags = [ RunTag(key=random_str(random_int(1, 5)), value=random_str(random_int(1, 5))) for _ in range(2) ] artifact_uri = random_str(random_int(10, 40)) ri = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name=name, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=status, start_time=start_time, end_time=end_time, source_version=source_version, tags=tags, artifact_uri=artifact_uri) return (ri, run_uuid, experiment_id, name, source_type, source_name, entry_point_name, user_id, status, start_time, end_time, source_version, tags, artifact_uri)
def test_log_batch(tracking_uri_mock): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) metrics = [Metric(key=key, value=value, timestamp=t) for key, value in expected_metrics.items()] params = [Param(key=key, value=value) for key, value in expected_params.items()] tags = [RunTag(key=key, value=value) for key, value in exact_expected_tags.items()] active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.tracking.MlflowClient().log_batch(run_id=run_uuid, metrics=metrics, params=params, tags=tags) finished_run = tracking.MlflowClient().get_run(run_uuid) # Validate metrics assert len(finished_run.data.metrics) == 2 for key, value in finished_run.data.metrics.items(): assert expected_metrics[key] == value # Validate tags (for automatically-set tags) assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags) for tag_key, tag_value in finished_run.data.tags.items(): if tag_key in approx_expected_tags: pass else: assert exact_expected_tags[tag_key] == tag_value # Validate params assert finished_run.data.params == expected_params
def create_run(self, experiment_id, start_time=None, tags=None): """ Create a :py:class:`mlflow.entities.Run` object that can be associated with metrics, parameters, artifacts, etc. Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code. Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by :py:func:`mlflow.log_param`. :param experiment_id: The ID of then experiment to create a run in. :param start_time: If not provided, use the current timestamp. :param tags: A dictionary of key-value pairs that are converted into :py:class:`mlflow.entities.RunTag` objects. :return: :py:class:`mlflow.entities.Run` that was created. """ tags = tags if tags else {} # Extract user from tags # This logic is temporary; the user_id attribute of runs is deprecated and will be removed # in a later release. user_id = tags.get(MLFLOW_USER, "unknown") return self.store.create_run( experiment_id=experiment_id, user_id=user_id, start_time=start_time or int(time.time() * 1000), tags=[RunTag(key, value) for (key, value) in tags.items()], )
def _log_dataset_tag(self, client, run_id, model_uuid): """ Log dataset metadata as a tag "mlflow.datasets", if the tag already exists, it will append current dataset metadata into existing tag content. """ existing_dataset_metadata_str = client.get_run(run_id).data.tags.get( "mlflow.datasets", "[]" ) dataset_metadata_list = json.loads(existing_dataset_metadata_str) for metadata in dataset_metadata_list: if ( metadata["hash"] == self.hash and metadata["name"] == self.name and metadata["model"] == model_uuid ): break else: dataset_metadata_list.append({**self._metadata, "model": model_uuid}) dataset_metadata_str = json.dumps(dataset_metadata_list, separators=(",", ":")) client.log_batch( run_id, tags=[RunTag("mlflow.datasets", dataset_metadata_str)], )
def _create(): metrics = [Metric(random_str(10), random_int(0, 1000), int(time.time()) + random_int(-1e4, 1e4))] params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa rd = RunData(metrics=metrics, params=params, tags=tags) return rd, metrics, params, tags
def test_validate_batch_log_limits(): too_many_metrics = [Metric("metric-key-%s" % i, 1, 0) for i in range(1001)] too_many_params = [Param("param-key-%s" % i, "b") for i in range(101)] too_many_tags = [RunTag("tag-key-%s" % i, "b") for i in range(101)] good_kwargs = {"metrics": [], "params": [], "tags": []} bad_kwargs = { "metrics": [too_many_metrics], "params": [too_many_params], "tags": [too_many_tags], } for arg_name, arg_values in bad_kwargs.items(): for arg_value in arg_values: final_kwargs = copy.deepcopy(good_kwargs) final_kwargs[arg_name] = arg_value with pytest.raises(MlflowException): _validate_batch_log_limits(**final_kwargs) # Test the case where there are too many entities in aggregate with pytest.raises(MlflowException): _validate_batch_log_limits(too_many_metrics[:900], too_many_params[:51], too_many_tags[:50]) # Test that we don't reject entities within the limit _validate_batch_log_limits(too_many_metrics[:1000], [], []) _validate_batch_log_limits([], too_many_params[:100], []) _validate_batch_log_limits([], [], too_many_tags[:100])
def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.RunTag`. """ return RunTag(key=self.key, value=self.value)
def set_tags(tags): """ Log a batch of tags for the current run. If no run is active, this method will create a new active run. :param tags: Dictionary of tag_name: String -> value: (String, but will be string-ified if not) :returns: None .. code-block:: python :caption: Example import mlflow tags = {"engineering": "ML Platform", "release.candidate": "RC1", "release.version": "2.2.0"} # Set a batch of tags with mlflow.start_run(): mlflow.set_tags(tags) """ run_id = _get_or_start_run().info.run_id tags_arr = [RunTag(key, str(value)) for key, value in tags.items()] MlflowClient().log_batch(run_id=run_id, metrics=[], params=[], tags=tags_arr)
def import_run_data(self, run_dct, run_id, src_user_id): from mlflow.entities import Metric, Param, RunTag now = round(time.time()) params = [Param(k, v) for k, v in run_dct["params"].items()] metrics = [ Metric(k, v, now, 0) for k, v in run_dct["metrics"].items() ] # TODO: missing timestamp and step semantics? tags = run_dct["tags"] if not self.import_mlflow_tags: # remove mlflow tags tags = { k: v for k, v in tags.items() if not k.startswith(utils.TAG_PREFIX_MLFLOW) } if not self.import_metadata_tags: # remove mlflow_export_import tags tags = { k: v for k, v in tags.items() if not k.startswith(utils.TAG_PREFIX_METADATA) } tags = utils.create_mlflow_tags_for_databricks_import( tags ) # remove "mlflow" tags that cannot be imported into Databricks tags = [RunTag(k, str(v)) for k, v in tags.items()] #self.dump_tags(tags,"1") # debug if not self.in_databricks: utils.set_dst_user_id(tags, src_user_id, self.use_src_user_id) #self.dump_tags(tags,"2") # debug self.client.log_batch(run_id, metrics, params, tags)
def set_tag(self, run_id, key, value): """ Set a tag on the run ID. Value is converted to a string. """ _validate_tag_name(key) tag = RunTag(key, str(value)) self.store.set_tag(run_id, tag)
def test_log_batch_allows_tag_overwrite(self): fs = FileStore(self.test_root) run = self._create_run(fs) fs.log_batch(run.info.run_id, metrics=[], params=[], tags=[RunTag("t-key", "val")]) fs.log_batch(run.info.run_id, metrics=[], params=[], tags=[RunTag("t-key", "newval")]) self._verify_logged(fs, run.info.run_id, metrics=[], params=[], tags=[RunTag("t-key", "newval")])
def test_weird_tag_names(self): WEIRD_TAG_NAME = "this is/a weird/but valid tag" fs = FileStore(self.test_root) run_id = self.exp_data[FileStore.DEFAULT_EXPERIMENT_ID]["runs"][0] fs.set_tag(run_id, RunTag(WEIRD_TAG_NAME, "Muhahaha!")) run = fs.get_run(run_id) assert run.data.tags[WEIRD_TAG_NAME] == "Muhahaha!"
def test_unicode_tag(self): fs = FileStore(self.test_root) run_id = self.exp_data[FileStore.DEFAULT_EXPERIMENT_ID]["runs"][0] value = u"𝐼 𝓈𝑜𝓁𝑒𝓂𝓃𝓁𝓎 𝓈𝓌𝑒𝒶𝓇 𝓉𝒽𝒶𝓉 𝐼 𝒶𝓂 𝓊𝓅 𝓉𝑜 𝓃𝑜 𝑔𝑜𝑜𝒹" fs.set_tag(run_id, RunTag("message", value)) tags = fs.get_run(run_id).data.tags assert tags["message"] == value
def test_log_batch_internal_error(self): # Verify that internal errors during log_batch result in MlflowExceptions fs = FileStore(self.test_root) run = self._create_run(fs) def _raise_exception_fn(*args, **kwargs): # pylint: disable=unused-argument raise Exception("Some internal error") with mock.patch("mlflow.store.file_store.FileStore.log_metric") as log_metric_mock, \ mock.patch("mlflow.store.file_store.FileStore.log_param") as log_param_mock, \ mock.patch("mlflow.store.file_store.FileStore.set_tag") as set_tag_mock: log_metric_mock.side_effect = _raise_exception_fn log_param_mock.side_effect = _raise_exception_fn set_tag_mock.side_effect = _raise_exception_fn for kwargs in [{ "metrics": [Metric("a", 3, 1, 0)] }, { "params": [Param("b", "c")] }, { "tags": [RunTag("c", "d")] }]: log_batch_kwargs = {"metrics": [], "params": [], "tags": []} log_batch_kwargs.update(kwargs) print(log_batch_kwargs) with self.assertRaises(MlflowException) as e: fs.log_batch(run.info.run_id, **log_batch_kwargs) self.assertIn(str(e.exception.message), "Some internal error") assert e.exception.error_code == ErrorCode.Name(INTERNAL_ERROR)
def test_log_batch_tags_idempotency(self): fs = FileStore(self.test_root) run = self._create_run(fs) fs.log_batch(run.info.run_id, metrics=[], params=[], tags=[RunTag("t-key", "t-val")]) fs.log_batch(run.info.run_id, metrics=[], params=[], tags=[RunTag("t-key", "t-val")]) self._verify_logged(fs, run.info.run_id, metrics=[], params=[], tags=[RunTag("t-key", "t-val")])
def record_logged_model(self, run_id, mlflow_model): from mlflow.models import Model if not isinstance(mlflow_model, Model): raise TypeError( "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'".format( type(mlflow_model) ) ) _validate_run_id(run_id) run_info = self._get_run_info(run_id) check_run_is_active(run_info) model_dict = mlflow_model.to_dict() run_info = self._get_run_info(run_id) path = self._get_tag_path(run_info.experiment_id, run_info.run_id, MLFLOW_LOGGED_MODELS) if os.path.exists(path): with open(path, "r") as f: model_list = json.loads(f.read()) else: model_list = [] tag = RunTag(MLFLOW_LOGGED_MODELS, json.dumps(model_list + [model_dict])) try: self._set_run_tag(run_info, tag) except Exception as e: raise MlflowException(e, INTERNAL_ERROR)
def test_search_runs_data(): runs = [ create_run( metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")], start=1564675200000, end=1564683035000, ), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")], start=1564765200000, end=1564783200000, ), ] with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs() data = { "status": [RunStatus.FINISHED] * 2, "artifact_uri": [None] * 2, "run_id": [""] * 2, "experiment_id": [""] * 2, "metrics.mse": [0.2, 0.6], "metrics.loss": [np.nan, 1.2], "params.param": ["value", None], "params.param2": [None, "val"], "params.k": [None, "v"], "tags.tag": ["value", None], "tags.tag2": [None, "v2"], "start_time": [ pd.to_datetime(1564675200000, unit="ms", utc=True), pd.to_datetime(1564765200000, unit="ms", utc=True), ], "end_time": [ pd.to_datetime(1564683035000, unit="ms", utc=True), pd.to_datetime(1564783200000, unit="ms", utc=True), ], } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): """ Creates a run with the specified attributes. """ experiment = self.get_experiment(experiment_id) if experiment is None: raise MlflowException( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id, databricks_pb2.RESOURCE_DOES_NOT_EXIST) if experiment.lifecycle_stage != LifecycleStage.ACTIVE: raise MlflowException( "Could not create run under non-active experiment with ID " "%s." % experiment_id, databricks_pb2.INVALID_STATE) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="", artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=LifecycleStage.ACTIVE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, _make_persisted_run_info_dict(run_info)) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) if parent_run_id: self.set_tag(run_uuid, RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id)) if run_name: self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return Run(run_info=run_info, run_data=None)
def test_log_batch_handler_success(mock_get_request_message, mock_get_request_json, tmpdir): # Test success cases for the LogBatch API def _test_log_batch_helper_success( metric_entities, param_entities, tag_entities, expected_metrics=None, expected_params=None, expected_tags=None): """ Simulates a LogBatch API request using the provided metrics/params/tags, asserting that it succeeds & that the backing store contains either the set of expected metrics/params/tags (if provided) or, by default, the metrics/params/tags used in the API request. """ with mlflow.start_run() as active_run: run_id = active_run.info.run_uuid mock_get_request_message.return_value = LogBatch( run_id=run_id, metrics=[m.to_proto() for m in metric_entities], params=[p.to_proto() for p in param_entities], tags=[t.to_proto() for t in tag_entities]) response = _log_batch() assert response.status_code == 200 json_response = json.loads(response.get_data()) assert json_response == {} _assert_logged_entities( run_id, expected_metrics or metric_entities, expected_params or param_entities, expected_tags or tag_entities) store = FileStore(tmpdir.strpath) mock_get_request_json.return_value = "{}" # Mock request JSON so it passes length validation server_patch = mock.patch('mlflow.server.handlers._get_store', return_value=store) client_patch = mock.patch('mlflow.tracking.utils._get_store', return_value=store) with server_patch, client_patch: mlflow.set_experiment("log-batch-experiment") # Log an empty payload _test_log_batch_helper_success([], [], []) # Log multiple metrics/params/tags _test_log_batch_helper_success( metric_entities=[Metric(key="m-key", value=3.2 * i, timestamp=i) for i in range(3)], param_entities=[Param(key="p-key-%s" % i, value="p-val-%s" % i) for i in range(4)], tag_entities=[RunTag(key="t-key-%s" % i, value="t-val-%s" % i) for i in range(5)]) # Log metrics with the same key _test_log_batch_helper_success( metric_entities=[Metric(key="m-key", value=3.2 * i, timestamp=3) for i in range(3)], param_entities=[], tag_entities=[]) # Log tags with the same key, verify the last one gets written same_key_tags = [RunTag(key="t-key", value="t-val-%s" % i) for i in range(5)] _test_log_batch_helper_success( metric_entities=[], param_entities=[], tag_entities=same_key_tags, expected_tags=[same_key_tags[-1]])
def test_requestor(self, request): response = mock.MagicMock response.status_code = 200 response.text = '{}' request.return_value = response creds = MlflowHostCreds('https://hello') store = RestStore(lambda: creds) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.log_param("some_uuid", Param("k1", "v1")) body = message_to_json( LogParam(run_uuid="some_uuid", key="k1", value="v1")) self._verify_requests(mock_http, creds, "runs/log-parameter", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000)) body = message_to_json( SetTag(run_uuid="some_uuid", key="t1", value="abcd" * 1000)) self._verify_requests(mock_http, creds, "runs/set-tag", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.log_metric("u2", Metric("m1", 0.87, 12345)) body = message_to_json( LogMetric(run_uuid="u2", key="m1", value=0.87, timestamp=12345)) self._verify_requests(mock_http, creds, "runs/log-metric", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.delete_run("u25") self._verify_requests(mock_http, creds, "runs/delete", "POST", message_to_json(DeleteRun(run_id="u25"))) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.restore_run("u76") self._verify_requests(mock_http, creds, "runs/restore", "POST", message_to_json(RestoreRun(run_id="u76"))) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.delete_experiment(0) self._verify_requests( mock_http, creds, "experiments/delete", "POST", message_to_json(DeleteExperiment(experiment_id=0))) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.restore_experiment(0) self._verify_requests( mock_http, creds, "experiments/restore", "POST", message_to_json(RestoreExperiment(experiment_id=0)))
def _set_tag(): request_message = _get_request_message(SetTag()) tag = RunTag(request_message.key, request_message.value) _get_store().set_tag(request_message.run_uuid, tag) response_message = SetTag.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def test_search_runs_data(): import numpy as np import pandas as pd runs = [ create_run( metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")], start=1564675200000, end=1564683035000, ), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")], start=1564765200000, end=1564783200000, ), ] with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs): pdf = search_runs() data = { "status": [RunStatus.FINISHED] * 2, "artifact_uri": [None] * 2, "run_id": [""] * 2, "experiment_id": [""] * 2, "metrics.mse": [0.2, 0.6], "metrics.loss": [np.nan, 1.2], "params.param": ["value", None], "params.param2": [None, "val"], "params.k": [None, "v"], "tags.tag": ["value", None], "tags.tag2": [None, "v2"], "start_time": [ pd.to_datetime(1564675200000, unit="ms", utc=True), pd.to_datetime(1564765200000, unit="ms", utc=True), ], "end_time": [ pd.to_datetime(1564683035000, unit="ms", utc=True), pd.to_datetime(1564783200000, unit="ms", utc=True), ], } validate_search_runs(pdf, data, "pandas")
def faculty_run_to_mlflow_run(faculty_run): lifecycle_stage = (LifecycleStage.ACTIVE if faculty_run.deleted_at is None else LifecycleStage.DELETED) start_time = _datetime_to_mlflow_timestamp(faculty_run.started_at) end_time = (_datetime_to_mlflow_timestamp(faculty_run.ended_at) if faculty_run.ended_at is not None else None) tag_dict = {tag.key: tag.value for tag in faculty_run.tags} extra_mlflow_tags = [] # Set run name tag if set as attribute but not already a tag if MLFLOW_RUN_NAME not in tag_dict and faculty_run.name: extra_mlflow_tags.append(RunTag(MLFLOW_RUN_NAME, faculty_run.name)) # Set parent run ID tag if set as attribute but not already a tag if (MLFLOW_PARENT_RUN_ID not in tag_dict and faculty_run.parent_run_id is not None): extra_mlflow_tags.append( RunTag(MLFLOW_PARENT_RUN_ID, faculty_run.parent_run_id.hex)) run_info = RunInfo( run_uuid=faculty_run.id.hex, experiment_id=str(faculty_run.experiment_id), user_id="", status=_FACULTY_TO_MLFLOW_RUN_STATUS_MAP[faculty_run.status], start_time=start_time, end_time=end_time, lifecycle_stage=lifecycle_stage, artifact_uri=faculty_run.artifact_location, run_id=faculty_run.id.hex, ) run_data = RunData( params=[ faculty_param_to_mlflow_param(param) for param in faculty_run.params ], metrics=[ faculty_metric_to_mlflow_metric(metric) for metric in faculty_run.metrics ], tags=[faculty_tag_to_mlflow_tag(tag) for tag in faculty_run.tags] + extra_mlflow_tags, ) run = Run(run_info, run_data) return run
def test_weird_tag_names(self): WEIRD_TAG_NAME = "this is/a weird/but valid tag" fs = FileStore(self.test_root) run_uuid = self.exp_data[0]["runs"][0] fs.set_tag(run_uuid, RunTag(WEIRD_TAG_NAME, "Muhahaha!")) tag = fs.get_run(run_uuid).data.tags[0] assert tag.key == WEIRD_TAG_NAME assert tag.value == "Muhahaha!"
def test_unicode_tag(self): fs = FileStore(self.test_root) run_uuid = self.exp_data[0]["runs"][0] value = u"𝐼 𝓈𝑜𝓁𝑒𝓂𝓃𝓁𝓎 𝓈𝓌𝑒𝒶𝓇 𝓉𝒽𝒶𝓉 𝐼 𝒶𝓂 𝓊𝓅 𝓉𝑜 𝓃𝑜 𝑔𝑜𝑜𝒹" fs.set_tag(run_uuid, RunTag("message", value)) tag = fs.get_run(run_uuid).data.tags[0] assert tag.key == "message" assert tag.value == value
def create_tags_for_mlflow_tags(tags_dct, import_mlflow_tags): from mlflow.entities import RunTag tags = [] for k, v in tags_dct.items(): if not import_mlflow_tags and k.startswith("mlflow."): k = PREFIX_SRC_RUN + "." + k tags.append(RunTag(k, str(v))) return tags