def test_log_batch(tracking_uri_mock): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) metrics = [ Metric(key=key, value=value, timestamp=t) for key, value in expected_metrics.items() ] params = [ Param(key=key, value=value) for key, value in expected_params.items() ] tags = [ RunTag(key=key, value=value) for key, value in exact_expected_tags.items() ] active_run = start_run() run_uuid = active_run.info.run_uuid with active_run: mlflow.tracking.MlflowClient().log_batch(run_id=run_uuid, metrics=metrics, params=params, tags=tags) finished_run = tracking.MlflowClient().get_run(run_uuid) # Validate metrics assert len(finished_run.data.metrics) == 2 for metric in finished_run.data.metrics: assert expected_metrics[metric.key] == metric.value # Validate tags (for automatically-set tags) assert len(finished_run.data.tags ) == len(exact_expected_tags) + len(approx_expected_tags) for tag in finished_run.data.tags: if tag.key in approx_expected_tags: pass else: assert exact_expected_tags[tag.key] == tag.value # Validate params assert len(finished_run.data.params) == 2 for param in finished_run.data.params: assert expected_params[param.key] == param.value
def test_log_batch(mlflow_client, backend_store_uri): experiment_id = mlflow_client.create_experiment('Batch em up') created_run = mlflow_client.create_run(experiment_id) run_id = created_run.info.run_id mlflow_client.log_batch( run_id=run_id, metrics=[Metric("metric", 123.456, 789, 3)], params=[Param("param", "value")], tags=[RunTag("taggity", "do-dah")]) run = mlflow_client.get_run(run_id) assert run.data.metrics.get('metric') == 123.456 assert run.data.params.get('param') == 'value' assert run.data.tags.get('taggity') == 'do-dah' metric_history = mlflow_client.get_metric_history(run_id, "metric") assert len(metric_history) == 1 metric = metric_history[0] assert metric.key == "metric" assert metric.value == 123.456 assert metric.timestamp == 789 assert metric.step == 3
def _create_entity(base, model): # create dict of kwargs properties for entity and return the initialized entity config = {} for k in base._properties(): # check if its mlflow entity and build it obj = getattr(model, k) if isinstance(model, SqlRun): if base is RunData: # Run data contains list for metrics, params and tags # so obj will be a list so we need to convert those items if k == 'metrics': # only get latest recorded metrics per key metrics = {} for o in obj: existing_metric = metrics.get(o.key) if (existing_metric is None)\ or ((o.step, o.timestamp, o.value) >= (existing_metric.step, existing_metric.timestamp, existing_metric.value)): metrics[o.key] = Metric(o.key, o.value, o.timestamp, o.step) obj = list(metrics.values()) elif k == 'params': obj = [Param(o.key, o.value) for o in obj] elif k == 'tags': obj = [RunTag(o.key, o.value) for o in obj] elif base is RunInfo: if k == 'source_type': obj = SourceType.from_string(obj) elif k == "status": obj = RunStatus.from_string(obj) elif k == "experiment_id": obj = str(obj) # Our data model defines experiment_ids as ints, but the in-memory representation was # changed to be a string in time for 1.0. if isinstance(model, SqlExperiment) and k == "experiment_id": obj = str(obj) config[k] = obj return base(**config)
def _log_batch(): _validate_batch_log_api_req(_get_request_json()) request_message = _get_request_message(LogBatch()) metrics = [ Metric.from_proto(proto_metric) for proto_metric in request_message.metrics ] params = [ Param.from_proto(proto_param) for proto_param in request_message.params ] tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags] _get_tracking_store().log_batch(run_id=request_message.run_id, metrics=metrics, params=params, tags=tags) response_message = LogBatch.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def test_log_batch(tracking_uri_mock, tmpdir): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) sorted_expected_metrics = sorted(expected_metrics.items(), key=lambda kv: kv[0]) metrics = [Metric(key=key, value=value, timestamp=t, step=i) for i, (key, value) in enumerate(sorted_expected_metrics)] params = [Param(key=key, value=value) for key, value in expected_params.items()] tags = [RunTag(key=key, value=value) for key, value in exact_expected_tags.items()] with start_run() as active_run: run_id = active_run.info.run_id mlflow.tracking.MlflowClient().log_batch(run_id=run_id, metrics=metrics, params=params, tags=tags) client = tracking.MlflowClient() finished_run = client.get_run(run_id) # Validate metrics assert len(finished_run.data.metrics) == 2 for key, value in finished_run.data.metrics.items(): assert expected_metrics[key] == value metric_history0 = client.get_metric_history(run_id, "metric-key0") assert set([(m.value, m.timestamp, m.step) for m in metric_history0]) == set([ (1.0, t, 0), ]) metric_history1 = client.get_metric_history(run_id, "metric-key1") assert set([(m.value, m.timestamp, m.step) for m in metric_history1]) == set([ (4.0, t, 1), ]) # Validate tags (for automatically-set tags) assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags) for tag_key, tag_value in finished_run.data.tags.items(): if tag_key in approx_expected_tags: pass else: assert exact_expected_tags[tag_key] == tag_value # Validate params assert finished_run.data.params == expected_params
def test_log_batch_internal_error(self): # Verify that internal errors during the DB save step for log_batch result in # MlflowExceptions run = self._run_factory() def _raise_exception_fn(*args, **kwargs): # pylint: disable=unused-argument raise Exception("Some internal error") with mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_metric") as metric_mock,\ mock.patch( "mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_param") as param_mock,\ mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.set_tag") as tags_mock: metric_mock.side_effect = _raise_exception_fn param_mock.side_effect = _raise_exception_fn tags_mock.side_effect = _raise_exception_fn for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]}, {"tags": [RunTag("c", "d")]}]: log_batch_kwargs = {"metrics": [], "params": [], "tags": []} log_batch_kwargs.update(kwargs) with self.assertRaises(MlflowException) as e: self.store.log_batch(run.info.run_uuid, **log_batch_kwargs) self.assertIn(str(e.exception.message), "Some internal error")
def _create_entity(base, model): # create dict of kwargs properties for entity and return the intialized entity config = {} for k in base._properties(): # check if its mlflow entity and build it obj = getattr(model, k) # Run data contains list for metrics, params and tags # so obj will be a list so we need to convert those items if k == 'metrics': obj = [Metric(o.key, o.value, o.timestamp) for o in obj] if k == 'params': obj = [Param(o.key, o.value) for o in obj] if k == 'tags': obj = [RunTag(o.key, o.value) for o in obj] config[k] = obj return base(**config)
def _generate_run(self, i, runs_dict): """ Generate a run object and save to runs_dict keyed by run_id. Most of data just depends on i, and some data are hard-coded for simplicityGenerate n number of runs. Most of data just depends on n, and some data are hard-coded for simplicity. """ key = f"key{i}" value = f"value{i}" start_time = 123456 * i end_time = start_time + (1000 * i) run_id = f"run_id_{i}" metrics = [Metric(key, value, start_time, "stage")] params = [Param(key, value)] tags = [RunTag(key, value)] run_info = RunInfo(run_id, "experiment_id", "user_id", "status", start_time, end_time, "lifecycle_stage") run = Run(run_info=run_info, run_data=RunData(metrics=metrics, params=params, tags=tags)) runs_dict[run_id] = run return run
def test_log_batch(mlflow_client): experiment_id = mlflow_client.create_experiment('Batch em up') created_run = mlflow_client.create_run(experiment_id) run_id = created_run.info.run_uuid # TODO(sid): pass and assert on step mlflow_client.log_batch(run_id=run_id, metrics=[Metric("metric", 123.456, 789, 0)], params=[Param("param", "value")], tags=[RunTag("taggity", "do-dah")]) run = mlflow_client.get_run(run_id) assert run.data.metrics.get('metric') == 123.456 assert run.data.params.get('param') == 'value' assert run.data.tags.get('taggity') == 'do-dah' # TODO(sid): replace this with mlflow_client.get_metric_history fs = FileStore(server_root_dir) metric_history = fs.get_metric_history(run_id, "metric") assert len(metric_history) == 1 metric = metric_history[0] assert metric.key == "metric" assert metric.value == 123.456 assert metric.timestamp == 789
def _create(): metrics = [ Metric(random_str(10), random_int(0, 1000), int(time.time() + random_int(-1e4, 1e4))) for _ in range(100) ] params = [ Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10) ] # noqa tags = [ RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10) ] # noqa rd = RunData() for p in params: rd._add_param(p) for m in metrics: rd._add_metric(m) for t in tags: rd._add_tag(t) return rd, metrics, params, tags
def test_log_batch_internal_error(self): # Verify that internal errors during log_batch result in MlflowExceptions fs = FileStore(self.test_root) run = self._create_run(fs) def _raise_exception_fn(*args, **kwargs): # pylint: disable=unused-argument raise Exception("Some internal error") with mock.patch("mlflow.store.file_store.FileStore.log_metric") as log_metric_mock, \ mock.patch("mlflow.store.file_store.FileStore.log_param") as log_param_mock, \ mock.patch("mlflow.store.file_store.FileStore.set_tag") as set_tag_mock: log_metric_mock.side_effect = _raise_exception_fn log_param_mock.side_effect = _raise_exception_fn set_tag_mock.side_effect = _raise_exception_fn for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]}, {"tags": [RunTag("c", "d")]}]: log_batch_kwargs = {"metrics": [], "params": [], "tags": []} log_batch_kwargs.update(kwargs) print(log_batch_kwargs) with self.assertRaises(MlflowException) as e: fs.log_batch(run.info.run_uuid, **log_batch_kwargs) self.assertIn(str(e.exception.message), "Some internal error") assert e.exception.error_code == ErrorCode.Name(INTERNAL_ERROR)
def log_params(params): """ Log a batch of params for the current run. If no run is active, this method will create a new active run. :param params: Dictionary of param_name: String -> value: (String, but will be string-ified if not) :returns: None .. code-block:: python :caption: Example import mlflow params = {"learning_rate": 0.01, "n_estimators": 10} # Log a batch of parameters with mlflow.start_run(): mlflow.log_params(params) """ run_id = _get_or_start_run().info.run_id params_arr = [Param(key, str(value)) for key, value in params.items()] MlflowClient().log_batch(run_id=run_id, metrics=[], params=params_arr, tags=[])
def import_run_data(self, run_dct, run_id, src_user_id): from mlflow.entities import Metric, Param, RunTag now = round(time.time()) params = [Param(k, v) for k, v in run_dct['params'].items()] metrics = [ Metric(k, v, now, 0) for k, v in run_dct['metrics'].items() ] # TODO: missing timestamp and step semantics? tags = run_dct['tags'] if not self.import_mlflow_tags: # remove mlflow tags keys = [k for k in tags.keys() if k.startswith("mlflow.")] for k in keys: tags.pop(k) if not self.import_mlflow_tools_tags: # remove mlflow_tools tags keys = [k for k in tags.keys() if k.startswith("mlflow_tools.")] for k in keys: tags.pop(k) tags = [RunTag(k, str(v)) for k, v in tags.items()] if not self.in_databricks: utils.set_dst_user_id(tags, src_user_id, self.use_src_user_id) #self.dump_tags(tags) self.client.log_batch(run_id, metrics, params, tags)
def _create_run(self, run): args = {"run_name": run.run_name} if run.run_name else {} with mlflow.start_run(**args) as active_run: params = [Param(k, v) for k, v in run.params.items()] metrics = [ Metric(k, v, int(time.time()), 0) for k, v in run.metrics.items() ] # TODO: timestamp and step? tags = [RunTag(k, v) for k, v in run.tags.items()] mlflow.tracking.MlflowClient().log_batch(active_run.info.run_id, metrics, params, tags) if self.do_tag: mlflow.set_tag("replayed", "true") for m in run.models: if m.model: m.log_model_func(m.model, m.model_name) for a in run.artifacts: if a.artifact_local_path: if a.artifact_local_path: with open(a.artifact_local_path, "wb") as f: f.write(a.artifact_bytes) mlflow.log_artifact(a.artifact_local_path, a.artifact_path) return active_run.info.run_id
def get_machine_log_items( machine: Machine) -> Tuple[List[Metric], List[Param]]: """ Create flat lists of MLflow logging entities from multilevel dictionary For more information, see the mlflow docs: https://www.mlflow.org/docs/latest/python_api/mlflow.tracking.html#mlflow.tracking.MlflowClient.log_batch Parameters ---------- machine: Machine Returns ------- metrics: List[Metric] List of MLFlow Metric objects to log. params: List[Param] List of MLFlow Param objects to log. """ metrics: List[Metric] = list() build_metadata = machine.metadata.build_metadata # Project/machine parameters keys = ["project_name", "name"] params = [Param(attr, getattr(machine, attr)) for attr in keys] # Dataset parameters dataset_keys = [ "train_start_date", "train_end_date", "resolution", "row_filter", "row_filter_buffer_size", ] params.extend( Param(k, str(getattr(machine.dataset, k))) for k in dataset_keys) # Model parameters model_keys = [ "model_creation_date", "model_builder_version", "model_offset" ] params.extend( Param(k, str(getattr(build_metadata.model, k))) for k in model_keys) # Parse cross-validation split metadata splits = build_metadata.model.cross_validation.splits params.extend(Param(k, str(v)) for k, v in splits.items()) # Parse cross-validation metrics tag_list = normalize_sensor_tags(machine.dataset.tag_list, asset=machine.dataset.asset) scores = build_metadata.model.cross_validation.scores keys = sorted(list(scores.keys())) subkeys = ["mean", "max", "min", "std"] n_folds = len(scores[keys[0]]) - len(subkeys) for k in keys: # Skip per tag data, produces too many params for MLflow if any([t.name in k for t in tag_list]): continue # Summary stats per metric for sk in subkeys: metrics.append( Metric(f"{k}-{sk}", scores[k][f"fold-{sk}"], epoch_now(), 0)) # Append value for each fold with increasing steps metrics.extend( Metric(k, scores[k][f"fold-{i+1}"], epoch_now(), i) for i in range(n_folds)) # Parse fit metrics try: meta_params = build_metadata.model.model_meta["history"]["params"] except KeyError: logger.debug( "Key 'build-metadata.model.history.params' not found found in metadata." ) else: metrics.extend( Metric(k, float(getattr(build_metadata.model, k)), epoch_now(), 0) for k in ["model_training_duration_sec"]) for m in meta_params["metrics"]: data = build_metadata.model.model_meta["history"][m] metrics.extend( Metric(m, float(x), timestamp=epoch_now(), step=i) for i, x in enumerate(data)) params.extend( Param(k, str(meta_params[k])) for k in (p for p in meta_params if p != "metrics")) return metrics, params
def _get_param_from_file(parent_path, param_name): _validate_param_name(param_name) value = read_file(parent_path, param_name) return Param(param_name, value)
def faculty_param_to_mlflow_param(faculty_param): return Param(key=faculty_param.key, value=faculty_param.value)
def to_mlflow_entity(self) -> Param: return Param(key=self.key, value=self.value)
def _create_child_runs_for_parameter_search(cv_estimator, parent_run, child_tags=None): """ Creates a collection of child runs for a parameter search training session. Runs are reconstructed from the `cv_results_` attribute of the specified trained parameter search estimator - `cv_estimator`, which provides relevant performance metrics for each point in the parameter search space. One child run is created for each point in the parameter search space. For additional information, see `https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html`_. # noqa: E501 :param cv_estimator: The trained parameter search estimator for which to create child runs. :param parent_run: A py:class:`mlflow.entities.Run` object referring to the parent parameter search run for which child runs should be created. :param child_tags: An optional dictionary of MLflow tag keys and values to log for each child run. """ import pandas as pd client = MlflowClient() # Use the start time of the parent parameter search run as a rough estimate for the # start time of child runs, since we cannot precisely determine when each point # in the parameter search space was explored child_run_start_time = parent_run.info.start_time child_run_end_time = int(time.time() * 1000) seed_estimator = cv_estimator.estimator # In the unlikely case that a seed of a parameter search estimator is, # itself, a parameter search estimator, we should avoid logging the untuned # parameters of the seeds's seed estimator should_log_params_deeply = not _is_parameter_search_estimator( seed_estimator) # Each row of `cv_results_` only provides parameters that vary across # the user-specified parameter grid. In order to log the complete set # of parameters for each child run, we fetch the parameters defined by # the seed estimator and update them with parameter subset specified # in the result row base_params = seed_estimator.get_params(deep=should_log_params_deeply) cv_results_df = pd.DataFrame.from_dict(cv_estimator.cv_results_) for _, result_row in cv_results_df.iterrows(): tags_to_log = dict(child_tags) if child_tags else {} tags_to_log.update({MLFLOW_PARENT_RUN_ID: parent_run.info.run_id}) tags_to_log.update(_get_estimator_info_tags(seed_estimator)) child_run = client.create_run( experiment_id=parent_run.info.experiment_id, start_time=child_run_start_time, tags=tags_to_log, ) from itertools import zip_longest params_to_log = dict(base_params) params_to_log.update(result_row.get("params", {})) param_batches_to_log = _chunk_dict( params_to_log, chunk_size=MAX_PARAMS_TAGS_PER_BATCH) # Parameters values are recorded twice in the set of search `cv_results_`: # once within a `params` column with dictionary values and once within # a separate dataframe column that is created for each parameter. To prevent # duplication of parameters, we log the consolidated values from the parameter # dictionary column and filter out the other parameter-specific columns with # names of the form `param_{param_name}`. Additionally, `cv_results_` produces # metrics for each training split, which is fairly verbose; accordingly, we filter # out per-split metrics in favor of aggregate metrics (mean, std, etc.) excluded_metric_prefixes = ["param", "split"] metric_batches_to_log = _chunk_dict( { key: value for key, value in result_row.iteritems() if not any([ key.startswith(prefix) for prefix in excluded_metric_prefixes ]) and isinstance(value, Number) }, chunk_size=min(MAX_ENTITIES_PER_BATCH - MAX_PARAMS_TAGS_PER_BATCH, MAX_METRICS_PER_BATCH), ) for params_batch, metrics_batch in zip_longest(param_batches_to_log, metric_batches_to_log, fillvalue={}): # Trim any parameter keys / values and metric keys that exceed the limits # imposed by corresponding MLflow Tracking APIs (e.g., LogParam, LogMetric) truncated_params_batch = _truncate_dict(params_batch, MAX_ENTITY_KEY_LENGTH, MAX_PARAM_VAL_LENGTH) truncated_metrics_batch = _truncate_dict( metrics_batch, max_key_length=MAX_ENTITY_KEY_LENGTH) client.log_batch( run_id=child_run.info.run_id, params=[ Param(str(key), str(value)) for key, value in truncated_params_batch.items() ], metrics=[ Metric(key=str(key), value=value, timestamp=child_run_end_time, step=0) for key, value in truncated_metrics_batch.items() ], ) client.set_terminated(run_id=child_run.info.run_id, end_time=child_run_end_time)
EXPERIMENT_ID = 12 NAME = "experiment name" ARTIFACT_LOCATION = "scheme://artifact-location" METRIC_TIMESTAMP = datetime(2019, 3, 13, 17, 0, 15, tzinfo=UTC) METRIC_TIMESTAMP_MILLISECONDS = to_timestamp(METRIC_TIMESTAMP) * 1000 FACULTY_METRIC = FacultyMetric(key="metric-key", value="metric-value", timestamp=METRIC_TIMESTAMP, step=0) MLFLOW_METRIC = Metric("metric-key", "metric-value", METRIC_TIMESTAMP_MILLISECONDS, 0) FACULTY_PARAM = FacultyParam(key="param-key", value="param-value") MLFLOW_PARAM = Param("param-key", "param-value") FACULTY_TAG = FacultyTag(key="tag-key", value="tag-value") MLFLOW_TAG = RunTag("tag-key", "tag-value") FACULTY_EXPERIMENT = FacultyExperiment( id=EXPERIMENT_ID, name=NAME, description="not used", artifact_location=ARTIFACT_LOCATION, created_at=datetime.now(tz=UTC), last_updated_at=datetime.now(tz=UTC), deleted_at=None, ) RUN_UUID = uuid4()
def test_log_batch_handler_success(mock_get_request_message, mock_get_request_json, tmpdir): # Test success cases for the LogBatch API def _test_log_batch_helper_success(metric_entities, param_entities, tag_entities, expected_metrics=None, expected_params=None, expected_tags=None): """ Simulates a LogBatch API request using the provided metrics/params/tags, asserting that it succeeds & that the backing store contains either the set of expected metrics/params/tags (if provided) or, by default, the metrics/params/tags used in the API request. """ with mlflow.start_run() as active_run: run_id = active_run.info.run_uuid mock_get_request_message.return_value = LogBatch( run_id=run_id, metrics=[m.to_proto() for m in metric_entities], params=[p.to_proto() for p in param_entities], tags=[t.to_proto() for t in tag_entities]) response = _log_batch() assert response.status_code == 200 json_response = json.loads(response.get_data()) assert json_response == {} _assert_logged_entities(run_id, expected_metrics or metric_entities, expected_params or param_entities, expected_tags or tag_entities) store = FileStore(tmpdir.strpath) mock_get_request_json.return_value = "{}" # Mock request JSON so it passes length validation server_patch = mock.patch('mlflow.server.handlers._get_store', return_value=store) client_patch = mock.patch('mlflow.tracking.utils._get_store', return_value=store) with server_patch, client_patch: mlflow.set_experiment("log-batch-experiment") # Log an empty payload _test_log_batch_helper_success([], [], []) # Log multiple metrics/params/tags _test_log_batch_helper_success(metric_entities=[ Metric(key="m-key", value=3.2 * i, timestamp=i) for i in range(3) ], param_entities=[ Param(key="p-key-%s" % i, value="p-val-%s" % i) for i in range(4) ], tag_entities=[ RunTag(key="t-key-%s" % i, value="t-val-%s" % i) for i in range(5) ]) # Log metrics with the same key _test_log_batch_helper_success(metric_entities=[ Metric(key="m-key", value=3.2 * i, timestamp=3) for i in range(3) ], param_entities=[], tag_entities=[]) # Log tags with the same key, verify the last one gets written same_key_tags = [ RunTag(key="t-key", value="t-val-%s" % i) for i in range(5) ] _test_log_batch_helper_success(metric_entities=[], param_entities=[], tag_entities=same_key_tags, expected_tags=[same_key_tags[-1]])
from mlflow.entities import Metric, Param, RunTag client = MlflowClient() experiment_id = client.create_experiment("new expenza") # client.set_experiment(experiment_id) def print_run_info(r): print(f"run_id: {r.info.run_id}") print(f"params: {r.data.params}") print(f"metrics: {r.data.metrics}") print(f"tags: {r.data.tags}") print(f"status: {r.info.status}") # Create Mlflow entities and a run under the default experiment # (whose id is "0") timestamp = int(time.time() * 1000) metrics = [Metric("m", 1.5, timestamp, 1)] params = [Param("p", "p")] tags = [RunTag("t", "t")] experiment_id = "1" run = client.create_run(experiment_id=experiment_id) # log entities, terminate the run, and fetch run status client.log_batch(run.info.run_id, metrics=metrics, params=params, tags=tags) client.set_terminated(run.info.run_id) run = client.get_run(run.info.run_id) print_run_info(run)
def report_params(self, m: Dict[str, Union[str, Dict]]) -> None: m = ua.flatten(m) self._log_batch(params=[Param(k, str(v)) for k, v in m.items()])
def _list_to_run_param(l): return [Param(key=d["key"], value=d["value"]) for d in l]
def test_log_batch_params_idempotency(self): run = self._run_factory() params = [Param("p-key", "p-val")] self.store.log_batch(run.info.run_uuid, metrics=[], params=params, tags=[]) self.store.log_batch(run.info.run_uuid, metrics=[], params=params, tags=[]) self._verify_logged(run.info.run_uuid, metrics=[], params=params, tags=[])
tags=[]) run_info = RunInfo(run_uuid="1", run_id="1", experiment_id="experiment_id", user_id="unknown", status=RunStatus.to_string(RunStatus.RUNNING), start_time=1, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE, artifact_uri="artifact_uri") run_data = RunData(metrics=[], params=[], tags=[]) run = Run(run_info=run_info, run_data=run_data) metric = Metric(key="metric1", value=1, timestamp=1, step=1) param = Param(key="param1", value="val1") tag = RunTag(key="tag1", value="val1") experiment_tag = ExperimentTag(key="tag1", value="val1") @mock.patch( "mlflow_elasticsearchstore.elasticsearch_store.ElasticsearchStore.list_experiments" ) @pytest.mark.usefixtures('create_mlflow_client') def test_list_experiments(list_experiments_mock, create_mlflow_client): create_mlflow_client.list_experiments(ViewType.ACTIVE_ONLY) list_experiments_mock.assert_called_once_with( view_type=ViewType.ACTIVE_ONLY)
def test_requestor(self, request): response = mock.MagicMock response.status_code = 200 response.text = '{}' request.return_value = response creds = MlflowHostCreds('https://hello') store = RestStore(lambda: creds) user_name = "mock user" source_name = "rest test" source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=source_name) source_type_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.LOCAL) with mock.patch('mlflow.store.rest_store.http_request') as mock_http, \ mock.patch('mlflow.tracking.utils._get_store', return_value=store), \ mock.patch('mlflow.tracking.context.default_context._get_user', return_value=user_name), \ mock.patch('time.time', return_value=13579), \ source_name_patch, source_type_patch: with mlflow.start_run(experiment_id="43"): cr_body = message_to_json( CreateRun(experiment_id="43", user_id=user_name, start_time=13579000, tags=[ ProtoRunTag(key='mlflow.source.name', value=source_name), ProtoRunTag(key='mlflow.source.type', value='LOCAL'), ProtoRunTag(key='mlflow.user', value=user_name) ])) expected_kwargs = self._args(creds, "runs/create", "POST", cr_body) assert mock_http.call_count == 1 actual_kwargs = mock_http.call_args[1] # Test the passed tag values separately from the rest of the request # Tag order is inconsistent on Python 2 and 3, but the order does not matter expected_tags = expected_kwargs['json'].pop('tags') actual_tags = actual_kwargs['json'].pop('tags') assert (sorted(expected_tags, key=lambda t: t['key']) == sorted( actual_tags, key=lambda t: t['key'])) assert expected_kwargs == actual_kwargs with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.log_param("some_uuid", Param("k1", "v1")) body = message_to_json( LogParam(run_uuid="some_uuid", run_id="some_uuid", key="k1", value="v1")) self._verify_requests(mock_http, creds, "runs/log-parameter", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000)) body = message_to_json( SetTag(run_uuid="some_uuid", run_id="some_uuid", key="t1", value="abcd" * 1000)) self._verify_requests(mock_http, creds, "runs/set-tag", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.delete_tag("some_uuid", "t1") body = message_to_json(DeleteTag(run_id="some_uuid", key="t1")) self._verify_requests(mock_http, creds, "runs/delete-tag", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.log_metric("u2", Metric("m1", 0.87, 12345, 3)) body = message_to_json( LogMetric(run_uuid="u2", run_id="u2", key="m1", value=0.87, timestamp=12345, step=3)) self._verify_requests(mock_http, creds, "runs/log-metric", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: metrics = [ Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1), Metric("m3", 0.58, 12345, 2) ] params = [Param("p1", "p1val"), Param("p2", "p2val")] tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")] store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags) metric_protos = [metric.to_proto() for metric in metrics] param_protos = [param.to_proto() for param in params] tag_protos = [tag.to_proto() for tag in tags] body = message_to_json( LogBatch(run_id="u2", metrics=metric_protos, params=param_protos, tags=tag_protos)) self._verify_requests(mock_http, creds, "runs/log-batch", "POST", body) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.delete_run("u25") self._verify_requests(mock_http, creds, "runs/delete", "POST", message_to_json(DeleteRun(run_id="u25"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.restore_run("u76") self._verify_requests(mock_http, creds, "runs/restore", "POST", message_to_json(RestoreRun(run_id="u76"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.delete_experiment("0") self._verify_requests( mock_http, creds, "experiments/delete", "POST", message_to_json(DeleteExperiment(experiment_id="0"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: store.restore_experiment("0") self._verify_requests( mock_http, creds, "experiments/restore", "POST", message_to_json(RestoreExperiment(experiment_id="0"))) with mock.patch('mlflow.store.rest_store.http_request') as mock_http: response = mock.MagicMock response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}' mock_http.return_value = response result = store.search_runs(["0", "1"], "params.p1 = 'a'", ViewType.ACTIVE_ONLY, max_results=10, order_by=["a"], page_token="12345abcde") expected_message = SearchRuns(experiment_ids=["0", "1"], filter="params.p1 = 'a'", run_view_type=ViewType.to_proto( ViewType.ACTIVE_ONLY), max_results=10, order_by=["a"], page_token="12345abcde") self._verify_requests(mock_http, creds, "runs/search", "POST", message_to_json(expected_message)) assert result.token == "67890fghij"
def test_requestor(self, request): response = mock.MagicMock response.status_code = 200 response.text = '{}' request.return_value = response creds = MlflowHostCreds('https://hello') store = RestStore(lambda: creds) user_name = "mock user" run_name = "rest run" source_name = "rest test" with mock.patch('mlflow.store.rest_store.http_request_safe') as mock_http, \ mock.patch('mlflow.tracking.utils._get_store', return_value=store), \ mock.patch('mlflow.tracking.client._get_user_id', return_value=user_name), \ mock.patch('time.time', return_value=13579): with mlflow.start_run(experiment_id="43", run_name=run_name, source_name=source_name): cr_body = message_to_json( CreateRun(experiment_id="43", run_name='', user_id=user_name, source_type=SourceType.LOCAL, source_name=source_name, start_time=13579000, tags=[ ProtoRunTag(key='mlflow.source.name', value=source_name), ProtoRunTag(key='mlflow.source.type', value='LOCAL') ])) st_body = message_to_json( SetTag(run_uuid='', key='mlflow.runName', value=run_name)) assert mock_http.call_count == 2 exp_calls = [("runs/create", "POST", cr_body), ("runs/set-tag", "POST", st_body)] self._verify_request_has_calls(mock_http, creds, exp_calls) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.log_param("some_uuid", Param("k1", "v1")) body = message_to_json( LogParam(run_uuid="some_uuid", key="k1", value="v1")) self._verify_requests(mock_http, creds, "runs/log-parameter", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000)) body = message_to_json( SetTag(run_uuid="some_uuid", key="t1", value="abcd" * 1000)) self._verify_requests(mock_http, creds, "runs/set-tag", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.log_metric("u2", Metric("m1", 0.87, 12345, 3)) body = message_to_json( LogMetric(run_uuid="u2", key="m1", value=0.87, timestamp=12345, step=3)) self._verify_requests(mock_http, creds, "runs/log-metric", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: metrics = [ Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1), Metric("m3", 0.58, 12345, 2) ] params = [Param("p1", "p1val"), Param("p2", "p2val")] tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")] store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags) metric_protos = [metric.to_proto() for metric in metrics] param_protos = [param.to_proto() for param in params] tag_protos = [tag.to_proto() for tag in tags] body = message_to_json( LogBatch(run_id="u2", metrics=metric_protos, params=param_protos, tags=tag_protos)) self._verify_requests(mock_http, creds, "runs/log-batch", "POST", body) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.delete_run("u25") self._verify_requests(mock_http, creds, "runs/delete", "POST", message_to_json(DeleteRun(run_id="u25"))) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.restore_run("u76") self._verify_requests(mock_http, creds, "runs/restore", "POST", message_to_json(RestoreRun(run_id="u76"))) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.delete_experiment("0") self._verify_requests( mock_http, creds, "experiments/delete", "POST", message_to_json(DeleteExperiment(experiment_id="0"))) with mock.patch( 'mlflow.store.rest_store.http_request_safe') as mock_http: store.restore_experiment("0") self._verify_requests( mock_http, creds, "experiments/restore", "POST", message_to_json(RestoreExperiment(experiment_id="0")))
def log_param(self, run_id, key, value): """Logs a parameter against the given run id. Value will be converted to a string.""" _validate_param_name(key) param = Param(key, str(value)) self.store.log_param(run_id, param)
def report_params(self, m: Dict[str, Union[str, Dict]]) -> None: flat_m = ua.flatten(m) self._log_batch(params=[ Param(sanitize_key(k), _sanitize_param_value(str(v))) for k, v in flat_m.items() ])