def test_log_batch(mlflow_client, backend_store_uri): experiment_id = mlflow_client.create_experiment('Batch em up') created_run = mlflow_client.create_run(experiment_id) run_id = created_run.info.run_id mlflow_client.log_batch( run_id=run_id, metrics=[Metric("metric", 123.456, 789, 3)], params=[Param("param", "value")], tags=[RunTag("taggity", "do-dah")]) run = mlflow_client.get_run(run_id) assert run.data.metrics.get('metric') == 123.456 assert run.data.params.get('param') == 'value' assert run.data.tags.get('taggity') == 'do-dah' metric_history = mlflow_client.get_metric_history(run_id, "metric") assert len(metric_history) == 1 metric = metric_history[0] assert metric.key == "metric" assert metric.value == 123.456 assert metric.timestamp == 789 assert metric.step == 3
def test_log_batch(tracking_uri_mock): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) metrics = [ Metric(key=key, value=value, timestamp=t) for key, value in expected_metrics.items() ] params = [ Param(key=key, value=value) for key, value in expected_params.items() ] tags = [ RunTag(key=key, value=value) for key, value in exact_expected_tags.items() ] active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.tracking.MlflowClient().log_batch(run_id=run_uuid, metrics=metrics, params=params, tags=tags) finished_run = tracking.MlflowClient().get_run(run_uuid) # Validate metrics assert len(finished_run.data.metrics) == 2 for metric in finished_run.data.metrics: assert expected_metrics[metric.key] == metric.value # Validate tags (for automatically-set tags) assert len(finished_run.data.tags ) == len(exact_expected_tags) + len(approx_expected_tags) for tag in finished_run.data.tags: if tag.key in approx_expected_tags: pass else: assert exact_expected_tags[tag.key] == tag.value # Validate params assert len(finished_run.data.params) == 2 for param in finished_run.data.params: assert expected_params[param.key] == param.value
def _log_batch(): _validate_batch_log_api_req(_get_request_json()) request_message = _get_request_message(LogBatch()) metrics = [ Metric.from_proto(proto_metric) for proto_metric in request_message.metrics ] params = [ Param.from_proto(proto_param) for proto_param in request_message.params ] tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags] _get_tracking_store().log_batch(run_id=request_message.run_id, metrics=metrics, params=params, tags=tags) response_message = LogBatch.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def test_list_all_columns_big(init_store): new_tags = [] new_tags_key = [] for i in range(100): new_tags.append(RunTag(f'my_tag{i}', f'val{i}')) new_tags_key.append(f'my_tag{i}') init_store.log_batch("7b2e71956f3d4c08b042624a8d83700d", metrics=[], params=[], tags=new_tags) new_tags_key.sort() # Wait for Elasticsearch refresh for search time.sleep(2) expected_columns = Columns(metrics=["big_metric", "inf_metric", "metric0", "metric1", "metric_batch1", "metric_batch2", "nan_metric", "negative_inf_metric", "new_metric"], params=["new_param", "param0", "param1", "param2", "param3", "param_batch1", "param_batch2"], tags=[*new_tags_key, *["new_tag", "tag0", "tag1", "tag2", "tag3", "tag_batch1", "tag_batch2"]]) actual_columns = init_store.list_all_columns("hTb553MBNoOYfhXjnnQh", ViewType.ACTIVE_ONLY) assert expected_columns.__dict__ == actual_columns.__dict__
def _create_entity(base, model): # create dict of kwargs properties for entity and return the initialized entity config = {} for k in base._properties(): # check if its mlflow entity and build it obj = getattr(model, k) if isinstance(model, SqlRun): if base is RunData: # Run data contains list for metrics, params and tags # so obj will be a list so we need to convert those items if k == 'metrics': # only get latest recorded metrics per key metrics = {} for o in obj: existing_metric = metrics.get(o.key) if (existing_metric is None)\ or ((o.step, o.timestamp, o.value) >= (existing_metric.step, existing_metric.timestamp, existing_metric.value)): metrics[o.key] = Metric(o.key, o.value, o.timestamp, o.step) obj = list(metrics.values()) elif k == 'params': obj = [Param(o.key, o.value) for o in obj] elif k == 'tags': obj = [RunTag(o.key, o.value) for o in obj] elif base is RunInfo: if k == 'source_type': obj = SourceType.from_string(obj) elif k == "status": obj = RunStatus.from_string(obj) elif k == "experiment_id": obj = str(obj) # Our data model defines experiment_ids as ints, but the in-memory representation was # changed to be a string in time for 1.0. if isinstance(model, SqlExperiment) and k == "experiment_id": obj = str(obj) config[k] = obj return base(**config)
def _create_run(): request_message = _get_request_message(CreateRun()) tags = [RunTag(tag.key, tag.value) for tag in request_message.tags] run = _get_store().create_run( experiment_id=request_message.experiment_id, user_id=request_message.user_id, run_name=request_message.run_name, source_type=request_message.source_type, source_name=request_message.source_name, entry_point_name=request_message.entry_point_name, start_time=request_message.start_time, source_version=request_message.source_version, tags=tags) response_message = CreateRun.Response() response_message.run.MergeFrom(run.to_proto()) response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def test_faculty_run_to_mlflow_run_name_backwards_compatability( faculty_attribute, faculty_tag_value, mlflow_attribute, mlflow_tag_value ): """Test logic setting run name tag when not available.""" if faculty_tag_value is None: extra_tags = [] else: extra_tags = [FacultyTag(MLFLOW_RUN_NAME, faculty_tag_value)] faculty_run = FACULTY_RUN._replace( name=faculty_attribute, tags=FACULTY_RUN.tags + extra_tags ) if mlflow_tag_value is None: name_tag = None else: name_tag = RunTag(MLFLOW_RUN_NAME, mlflow_tag_value) expected_run = mlflow_run(name=mlflow_attribute, name_tag=name_tag) assert run_equals(faculty_run_to_mlflow_run(faculty_run), expected_run)
def test_faculty_run_to_mlflow_run_parent_run_id_backwards_compatability( faculty_attribute, faculty_tag_value, mlflow_tag_value ): """Test logic setting parent run ID tag when not available.""" if faculty_tag_value is None: extra_tags = [] else: extra_tags = [FacultyTag(MLFLOW_PARENT_RUN_ID, faculty_tag_value)] faculty_run = FACULTY_RUN._replace( parent_run_id=faculty_attribute, tags=FACULTY_RUN.tags + extra_tags ) if mlflow_tag_value is None: parent_run_id_tag = None else: parent_run_id_tag = RunTag(MLFLOW_PARENT_RUN_ID, mlflow_tag_value) expected_run = mlflow_run(parent_run_id_tag=parent_run_id_tag) assert run_equals(faculty_run_to_mlflow_run(faculty_run), expected_run)
def test_log_batch(tracking_uri_mock, tmpdir): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) sorted_expected_metrics = sorted(expected_metrics.items(), key=lambda kv: kv[0]) metrics = [Metric(key=key, value=value, timestamp=t, step=i) for i, (key, value) in enumerate(sorted_expected_metrics)] params = [Param(key=key, value=value) for key, value in expected_params.items()] tags = [RunTag(key=key, value=value) for key, value in exact_expected_tags.items()] with start_run() as active_run: run_id = active_run.info.run_id mlflow.tracking.MlflowClient().log_batch(run_id=run_id, metrics=metrics, params=params, tags=tags) client = tracking.MlflowClient() finished_run = client.get_run(run_id) # Validate metrics assert len(finished_run.data.metrics) == 2 for key, value in finished_run.data.metrics.items(): assert expected_metrics[key] == value metric_history0 = client.get_metric_history(run_id, "metric-key0") assert set([(m.value, m.timestamp, m.step) for m in metric_history0]) == set([ (1.0, t, 0), ]) metric_history1 = client.get_metric_history(run_id, "metric-key1") assert set([(m.value, m.timestamp, m.step) for m in metric_history1]) == set([ (4.0, t, 1), ]) # Validate tags (for automatically-set tags) assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags) for tag_key, tag_value in finished_run.data.tags.items(): if tag_key in approx_expected_tags: pass else: assert exact_expected_tags[tag_key] == tag_value # Validate params assert finished_run.data.params == expected_params
def test_log_batch(mlflow_client): experiment_id = mlflow_client.create_experiment('Batch em up') created_run = mlflow_client.create_run(experiment_id) run_id = created_run.info.run_uuid # TODO(sid): pass and assert on step mlflow_client.log_batch(run_id=run_id, metrics=[Metric("metric", 123.456, 789, 0)], params=[Param("param", "value")], tags=[RunTag("taggity", "do-dah")]) run = mlflow_client.get_run(run_id) assert run.data.metrics.get('metric') == 123.456 assert run.data.params.get('param') == 'value' assert run.data.tags.get('taggity') == 'do-dah' # TODO(sid): replace this with mlflow_client.get_metric_history fs = FileStore(server_root_dir) metric_history = fs.get_metric_history(run_id, "metric") assert len(metric_history) == 1 metric = metric_history[0] assert metric.key == "metric" assert metric.value == 123.456 assert metric.timestamp == 789
def test_log_batch_internal_error(self): # Verify that internal errors during the DB save step for log_batch result in # MlflowExceptions run = self._run_factory() def _raise_exception_fn(*args, **kwargs): # pylint: disable=unused-argument raise Exception("Some internal error") with mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_metric") as metric_mock,\ mock.patch( "mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_param") as param_mock,\ mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.set_tag") as tags_mock: metric_mock.side_effect = _raise_exception_fn param_mock.side_effect = _raise_exception_fn tags_mock.side_effect = _raise_exception_fn for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]}, {"tags": [RunTag("c", "d")]}]: log_batch_kwargs = {"metrics": [], "params": [], "tags": []} log_batch_kwargs.update(kwargs) with self.assertRaises(MlflowException) as e: self.store.log_batch(run.info.run_uuid, **log_batch_kwargs) self.assertIn(str(e.exception.message), "Some internal error")
def _create_entity(base, model): # create dict of kwargs properties for entity and return the intialized entity config = {} for k in base._properties(): # check if its mlflow entity and build it obj = getattr(model, k) # Run data contains list for metrics, params and tags # so obj will be a list so we need to convert those items if k == 'metrics': obj = [Metric(o.key, o.value, o.timestamp) for o in obj] if k == 'params': obj = [Param(o.key, o.value) for o in obj] if k == 'tags': obj = [RunTag(o.key, o.value) for o in obj] config[k] = obj return base(**config)
def _generate_run(self, i, runs_dict): """ Generate a run object and save to runs_dict keyed by run_id. Most of data just depends on i, and some data are hard-coded for simplicityGenerate n number of runs. Most of data just depends on n, and some data are hard-coded for simplicity. """ key = f"key{i}" value = f"value{i}" start_time = 123456 * i end_time = start_time + (1000 * i) run_id = f"run_id_{i}" metrics = [Metric(key, value, start_time, "stage")] params = [Param(key, value)] tags = [RunTag(key, value)] run_info = RunInfo(run_id, "experiment_id", "user_id", "status", start_time, end_time, "lifecycle_stage") run = Run(run_info=run_info, run_data=RunData(metrics=metrics, params=params, tags=tags)) runs_dict[run_id] = run return run
def test_create_run_returns_expected_run_data(self): fs = FileStore(self.test_root) no_tags_run = fs.create_run( experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, user_id='user', start_time=0, tags=[]) assert isinstance(no_tags_run.data, RunData) assert len(no_tags_run.data.tags) == 0 tags_dict = { "my_first_tag": "first", "my-second-tag": "2nd", } tags_entities = [ RunTag(key, value) for key, value in tags_dict.items() ] tags_run = fs.create_run( experiment_id=FileStore.DEFAULT_EXPERIMENT_ID, user_id='user', start_time=0, tags=tags_entities) assert isinstance(tags_run.data, RunData) assert tags_run.data.tags == tags_dict
def _log_dataset_tag(self, client, run_id): """ Log dataset metadata as a tag "mlflow.datasets", if the tag already exists, it will append current dataset metadata into existing tag content. """ existing_dataset_metadata_str = client.get_run(run_id).data.tags.get( "mlflow.datasets", "[]") dataset_metadata_list = json.loads(existing_dataset_metadata_str) for metadata in dataset_metadata_list: if metadata["hash"] == self.hash and metadata[ "name"] == self._user_specified_name: break else: dataset_metadata_list.append(self._metadata) dataset_metadata_str = json.dumps(dataset_metadata_list, separators=(",", ":")) client.log_batch( run_id, tags=[RunTag("mlflow.datasets", dataset_metadata_str)], )
def _create(): metrics = [ Metric(random_str(10), random_int(0, 1000), int(time.time() + random_int(-1e4, 1e4))) for _ in range(100) ] params = [ Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10) ] # noqa tags = [ RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10) ] # noqa rd = RunData() for p in params: rd._add_param(p) for m in metrics: rd._add_metric(m) for t in tags: rd._add_tag(t) return rd, metrics, params, tags
def create_run(self, experiment_id, user_id=None, run_name=None, source_type=None, source_name=None, entry_point_name=None, start_time=None, source_version=None, tags=None, parent_run_id=None): """ Create a :py:class:`mlflow.entities.Run` object that can be associated with metrics, parameters, artifacts, etc. Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code. Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by :py:func:`mlflow.log_param`. :param user_id: If not provided, use the current user as a default. :param start_time: If not provided, use the current timestamp. :param tags: A dictionary of key-value pairs that are converted into :py:class:`mlflow.entities.RunTag` objects. :return: :py:class:`mlflow.entities.Run` that was created. """ tags = tags if tags else {} return self.store.create_run( experiment_id=experiment_id, user_id=user_id if user_id is not None else _get_user_id(), run_name=run_name, source_type=source_type if source_type is not None else SourceType.LOCAL, source_name=source_name if source_name is not None else "Python Application", entry_point_name=entry_point_name, start_time=start_time or int(time.time() * 1000), source_version=source_version, tags=[RunTag(key, value) for (key, value) in iteritems(tags)], parent_run_id=parent_run_id, )
def test_log_batch_internal_error(self): # Verify that internal errors during log_batch result in MlflowExceptions fs = FileStore(self.test_root) run = self._create_run(fs) def _raise_exception_fn(*args, **kwargs): # pylint: disable=unused-argument raise Exception("Some internal error") with mock.patch("mlflow.store.file_store.FileStore.log_metric") as log_metric_mock, \ mock.patch("mlflow.store.file_store.FileStore.log_param") as log_param_mock, \ mock.patch("mlflow.store.file_store.FileStore.set_tag") as set_tag_mock: log_metric_mock.side_effect = _raise_exception_fn log_param_mock.side_effect = _raise_exception_fn set_tag_mock.side_effect = _raise_exception_fn for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]}, {"tags": [RunTag("c", "d")]}]: log_batch_kwargs = {"metrics": [], "params": [], "tags": []} log_batch_kwargs.update(kwargs) print(log_batch_kwargs) with self.assertRaises(MlflowException) as e: fs.log_batch(run.info.run_uuid, **log_batch_kwargs) self.assertIn(str(e.exception.message), "Some internal error") assert e.exception.error_code == ErrorCode.Name(INTERNAL_ERROR)
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags, parent_run_id): """ Creates a run under the specified experiment ID, setting the run's status to "RUNNING" and the start time to the current time. :param experiment_id: ID of the experiment for this run :param user_id: ID of the user launching this run :param source_type: Enum (integer) describing the source of the run :return: The created Run object """ tag_protos = [tag.to_proto() for tag in tags] req_body = message_to_json(CreateRun( experiment_id=experiment_id, user_id=user_id, run_name="", source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, start_time=start_time, source_version=source_version, tags=tag_protos, parent_run_id=parent_run_id)) response_proto = self._call_endpoint(CreateRun, req_body) run = Run.from_proto(response_proto.run) if run_name: self.set_tag(run.info.run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return run
def create_run(self, experiment_id, user_id, run_name, source_type, source_name, entry_point_name, start_time, source_version, tags): """ Creates a run with the specified attributes. """ if self.get_experiment(experiment_id) is None: raise Exception( "Could not create run under experiment with ID %s - no such experiment " "exists." % experiment_id) run_uuid = uuid.uuid4().hex artifact_uri = self._get_artifact_dir(experiment_id, run_uuid) run_info = RunInfo(run_uuid=run_uuid, experiment_id=experiment_id, name="", artifact_uri=artifact_uri, source_type=source_type, source_name=source_name, entry_point_name=entry_point_name, user_id=user_id, status=RunStatus.RUNNING, start_time=start_time, end_time=None, source_version=source_version, lifecycle_stage=RunInfo.ACTIVE_LIFECYCLE) # Persist run metadata and create directories for logging metrics, parameters, artifacts run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid) mkdir(run_dir) write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, _make_persisted_run_info_dict(run_info)) mkdir(run_dir, FileStore.METRICS_FOLDER_NAME) mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME) mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME) for tag in tags: self.set_tag(run_uuid, tag) if run_name: self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name)) return Run(run_info=run_info, run_data=None)
def import_run_data(self, run_dct, run_id, src_user_id): from mlflow.entities import Metric, Param, RunTag now = round(time.time()) params = [Param(k, v) for k, v in run_dct['params'].items()] metrics = [ Metric(k, v, now, 0) for k, v in run_dct['metrics'].items() ] # TODO: missing timestamp and step semantics? tags = run_dct['tags'] if not self.import_mlflow_tags: # remove mlflow tags keys = [k for k in tags.keys() if k.startswith("mlflow.")] for k in keys: tags.pop(k) if not self.import_mlflow_tools_tags: # remove mlflow_tools tags keys = [k for k in tags.keys() if k.startswith("mlflow_tools.")] for k in keys: tags.pop(k) tags = [RunTag(k, str(v)) for k, v in tags.items()] if not self.in_databricks: utils.set_dst_user_id(tags, src_user_id, self.use_src_user_id) #self.dump_tags(tags) self.client.log_batch(run_id, metrics, params, tags)
def _create_run(self, run): args = {"run_name": run.run_name} if run.run_name else {} with mlflow.start_run(**args) as active_run: params = [Param(k, v) for k, v in run.params.items()] metrics = [ Metric(k, v, int(time.time()), 0) for k, v in run.metrics.items() ] # TODO: timestamp and step? tags = [RunTag(k, v) for k, v in run.tags.items()] mlflow.tracking.MlflowClient().log_batch(active_run.info.run_id, metrics, params, tags) if self.do_tag: mlflow.set_tag("replayed", "true") for m in run.models: if m.model: m.log_model_func(m.model, m.model_name) for a in run.artifacts: if a.artifact_local_path: if a.artifact_local_path: with open(a.artifact_local_path, "wb") as f: f.write(a.artifact_bytes) mlflow.log_artifact(a.artifact_local_path, a.artifact_path) return active_run.info.run_id
def record_logged_model(self, run_id, mlflow_model): if not isinstance(mlflow_model, Model): raise TypeError( "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'" .format(type(mlflow_model))) _validate_run_id(run_id) run_info = self._get_run_info(run_id) check_run_is_active(run_info) model_dict = mlflow_model.to_dict() run_info = self._get_run_info(run_id) path = self._get_tag_path(run_info.experiment_id, run_info.run_id, MLFLOW_LOGGED_MODELS) if os.path.exists(path): with open(path, "r") as f: model_list = json.loads(f.read()) else: model_list = [] tag = RunTag(MLFLOW_LOGGED_MODELS, json.dumps(model_list + [model_dict])) try: self._set_run_tag(run_info, tag) except Exception as e: raise MlflowException(e, INTERNAL_ERROR)
def test_search_tags(self): fs = FileStore(self.test_root) experiment_id = self.experiments[0] r1 = fs.create_run( experiment_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0, None, [], None).info.run_uuid r2 = fs.create_run( experiment_id, 'user', 'name', 'source_type', 'source_name', 'entry_point_name', 0, None, [], None).info.run_uuid fs.set_tag(r1, RunTag('generic_tag', 'p_val')) fs.set_tag(r2, RunTag('generic_tag', 'p_val')) fs.set_tag(r1, RunTag('generic_2', 'some value')) fs.set_tag(r2, RunTag('generic_2', 'another value')) fs.set_tag(r1, RunTag('p_a', 'abc')) fs.set_tag(r2, RunTag('p_b', 'ABC')) # test search returns both runs six.assertCountEqual(self, [r1, r2], self._search(fs, experiment_id, filter_str="tags.generic_tag = 'p_val'")) # test search returns appropriate run (same key different values per run) six.assertCountEqual(self, [r1], self._search(fs, experiment_id, filter_str="tags.generic_2 = 'some value'")) six.assertCountEqual(self, [r2], self._search(fs, experiment_id, filter_str="tags.generic_2='another value'")) six.assertCountEqual(self, [], self._search(fs, experiment_id, filter_str="tags.generic_tag = 'wrong_val'")) six.assertCountEqual(self, [], self._search(fs, experiment_id, filter_str="tags.generic_tag != 'p_val'")) six.assertCountEqual(self, [r1, r2], self._search(fs, experiment_id, filter_str="tags.generic_tag != 'wrong_val'")) six.assertCountEqual(self, [r1, r2], self._search(fs, experiment_id, filter_str="tags.generic_2 != 'wrong_val'")) six.assertCountEqual(self, [r1], self._search(fs, experiment_id, filter_str="tags.p_a = 'abc'")) six.assertCountEqual(self, [r2], self._search(fs, experiment_id, filter_str="tags.p_b = 'ABC'"))
def test_search_tags(self): fs = self._get_store() experiment_id = self.experiments[0] r1 = fs.create_run(experiment_id, "user", 0, []).info.run_id r2 = fs.create_run(experiment_id, "user", 0, []).info.run_id fs.set_tag(r1, RunTag("generic_tag", "p_val")) fs.set_tag(r2, RunTag("generic_tag", "p_val")) fs.set_tag(r1, RunTag("generic_2", "some value")) fs.set_tag(r2, RunTag("generic_2", "another value")) fs.set_tag(r1, RunTag("p_a", "abc")) fs.set_tag(r2, RunTag("p_b", "ABC")) # test search returns both runs six.assertCountEqual( self, [r1, r2], self._search(fs, experiment_id, filter_str="tags.generic_tag = 'p_val'")) # test search returns appropriate run (same key different values per run) six.assertCountEqual( self, [r1], self._search(fs, experiment_id, filter_str="tags.generic_2 = 'some value'")) six.assertCountEqual( self, [r2], self._search(fs, experiment_id, filter_str="tags.generic_2='another value'")) six.assertCountEqual( self, [], self._search(fs, experiment_id, filter_str="tags.generic_tag = 'wrong_val'")) six.assertCountEqual( self, [], self._search(fs, experiment_id, filter_str="tags.generic_tag != 'p_val'")) six.assertCountEqual( self, [r1, r2], self._search(fs, experiment_id, filter_str="tags.generic_tag != 'wrong_val'"), ) six.assertCountEqual( self, [r1, r2], self._search(fs, experiment_id, filter_str="tags.generic_2 != 'wrong_val'"), ) six.assertCountEqual( self, [r1], self._search(fs, experiment_id, filter_str="tags.p_a = 'abc'")) six.assertCountEqual( self, [r2], self._search(fs, experiment_id, filter_str="tags.p_b = 'ABC'"))
def _get_tag_from_file(parent_path, tag_name): _validate_tag_name(tag_name) tag_data = read_file(parent_path, tag_name) return RunTag(tag_name, tag_data)
def test_requestor(self, request): response = mock.MagicMock response.status_code = 200 response.text = '{}' request.return_value = response creds = MlflowHostCreds('https://hello') store = RestStore(lambda: creds) user_name = "mock user" source_name = "rest test" source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=source_name) source_type_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.LOCAL) with mock.patch('mlflow.store.rest_store.http_request') as mock_http, \ mock.patch('mlflow.tracking.utils._get_store', return_value=store), \ mock.patch('mlflow.tracking.context.default_context._get_user', return_value=user_name), \ mock.patch('time.time', return_value=13579), \ source_name_patch, source_type_patch: with mlflow.start_run(experiment_id="43"): cr_body = message_to_json( CreateRun(experiment_id="43", user_id=user_name, start_time=13579000, tags=[ ProtoRunTag(key='mlflow.source.name', value=source_name), ProtoRunTag(key='mlflow.source.type', value='LOCAL'), ProtoRunTag(key='mlflow.user', value=user_name) ])) expected_kwargs = self._args(creds, "runs/create", "POST", cr_body) assert mock_http.call_count == 1 actual_kwargs = mock_http.call_args[1] # Test the passed tag values separately from the rest of the request # Tag order is inconsistent on Python 2 and 3, but the order does not matter expected_tags = expected_kwargs['json'].pop('tags') actual_tags = actual_kwargs['json'].pop('tags') assert (sorted(expected_tags, key=lambda t: t['key']) == sorted( actual_tags, key=lambda t: t['key'])) assert expected_kwargs == actual_kwargs with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.log_param("some_uuid", Param("k1", "v1")) body = message_to_json( LogParam(run_uuid="some_uuid", run_id="some_uuid", key="k1", value="v1")) self._verify_requests(mock_http, creds, "runs/log-parameter", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000)) body = message_to_json( SetTag(run_uuid="some_uuid", run_id="some_uuid", key="t1", value="abcd" * 1000)) self._verify_requests(mock_http, creds, "runs/set-tag", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.delete_tag("some_uuid", "t1") body = message_to_json(DeleteTag(run_id="some_uuid", key="t1")) self._verify_requests(mock_http, creds, "runs/delete-tag", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.log_metric("u2", Metric("m1", 0.87, 12345, 3)) body = message_to_json( LogMetric(run_uuid="u2", run_id="u2", key="m1", value=0.87, timestamp=12345, step=3)) self._verify_requests(mock_http, creds, "runs/log-metric", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: metrics = [ Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1), Metric("m3", 0.58, 12345, 2) ] params = [Param("p1", "p1val"), Param("p2", "p2val")] tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")] store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags) metric_protos = [metric.to_proto() for metric in metrics] param_protos = [param.to_proto() for param in params] tag_protos = [tag.to_proto() for tag in tags] body = message_to_json( LogBatch(run_id="u2", metrics=metric_protos, params=param_protos, tags=tag_protos)) self._verify_requests(mock_http, creds, "runs/log-batch", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.delete_run("u25") self._verify_requests(mock_http, creds, "runs/delete", "POST", message_to_json(DeleteRun(run_id="u25"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.restore_run("u76") self._verify_requests(mock_http, creds, "runs/restore", "POST", message_to_json(RestoreRun(run_id="u76"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.delete_experiment("0") self._verify_requests( mock_http, creds, "experiments/delete", "POST", message_to_json(DeleteExperiment(experiment_id="0"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.restore_experiment("0") self._verify_requests( mock_http, creds, "experiments/restore", "POST", message_to_json(RestoreExperiment(experiment_id="0"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: response = mock.MagicMock response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}' mock_http.return_value = response result = store.search_runs(["0", "1"], "params.p1 = 'a'", ViewType.ACTIVE_ONLY, max_results=10, order_by=["a"], page_token="12345abcde") expected_message = SearchRuns(experiment_ids=["0", "1"], filter="params.p1 = 'a'", run_view_type=ViewType.to_proto( ViewType.ACTIVE_ONLY), max_results=10, order_by=["a"], page_token="12345abcde") self._verify_requests(mock_http, creds, "runs/search", "POST", message_to_json(expected_message)) assert result.token == "67890fghij"
def test_log_batch_allows_tag_overwrite_single_req(self): fs = FileStore(self.test_root) run = self._create_run(fs) tags = [RunTag("t-key", "val"), RunTag("t-key", "newval")] fs.log_batch(run.info.run_uuid, metrics=[], params=[], tags=tags) self._verify_logged(fs, run.info.run_uuid, metrics=[], params=[], tags=[tags[-1]])
def faculty_tag_to_mlflow_tag(faculty_tag): return RunTag(key=faculty_tag.key, value=faculty_tag.value)
def set_tag(self, run_id, key, value): """Sets a tag on the given run id. Value will be converted to a string.""" _validate_tag_name(key) tag = RunTag(key, str(value)) self.store.set_tag(run_id, tag)