def test_client_create_run_overrides(mock_store): experiment_id = mock.Mock() user = mock.Mock() start_time = mock.Mock() tags = { MLFLOW_USER: user, MLFLOW_PARENT_RUN_ID: mock.Mock(), MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), MLFLOW_SOURCE_NAME: mock.Mock(), MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(), MLFLOW_GIT_COMMIT: mock.Mock(), "other-key": "other-value" } MlflowClient().create_run(experiment_id, start_time, tags) mock_store.create_run.assert_called_once_with( experiment_id=experiment_id, user_id=user, start_time=start_time, tags=[RunTag(key, value) for key, value in tags.items()], ) mock_store.reset_mock() MlflowClient().create_run(experiment_id, start_time, tags) mock_store.create_run.assert_called_once_with( experiment_id=experiment_id, user_id=user, start_time=start_time, tags=[RunTag(key, value) for key, value in tags.items()])
def test_validate_batch_log_data(): metrics_with_bad_key = [ Metric("good-metric-key", 1.0, 0, 0), Metric("super-long-bad-key" * 1000, 4.0, 0, 0) ] metrics_with_bad_val = [ Metric("good-metric-key", "not-a-double-val", 0, 0) ] metrics_with_bad_ts = [ Metric("good-metric-key", 1.0, "not-a-timestamp", 0) ] metrics_with_neg_ts = [Metric("good-metric-key", 1.0, -123, 0)] metrics_with_bad_step = [Metric("good-metric-key", 1.0, 0, "not-a-step")] params_with_bad_key = [ Param("good-param-key", "hi"), Param("super-long-bad-key" * 1000, "but-good-val") ] params_with_bad_val = [ Param("good-param-key", "hi"), Param("another-good-key", "but-bad-val" * 1000) ] tags_with_bad_key = [ RunTag("good-tag-key", "hi"), RunTag("super-long-bad-key" * 1000, "but-good-val") ] tags_with_bad_val = [ RunTag("good-tag-key", "hi"), RunTag("another-good-key", "but-bad-val" * 1000) ] bad_kwargs = { "metrics": [ metrics_with_bad_key, metrics_with_bad_val, metrics_with_bad_ts, metrics_with_neg_ts, metrics_with_bad_step ], "params": [params_with_bad_key, params_with_bad_val], "tags": [tags_with_bad_key, tags_with_bad_val], } good_kwargs = {"metrics": [], "params": [], "tags": []} for arg_name, arg_values in bad_kwargs.items(): for arg_value in arg_values: final_kwargs = copy.deepcopy(good_kwargs) final_kwargs[arg_name] = arg_value with pytest.raises(MlflowException): _validate_batch_log_data(**final_kwargs) # Test that we don't reject entities within the limit _validate_batch_log_data(metrics=[Metric("metric-key", 1.0, 0, 0)], params=[Param("param-key", "param-val")], tags=[RunTag("tag-key", "tag-val")])
def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.RunTag`. """ return RunTag(key=self.key, value=self.value)
def create_run(self, experiment_id, start_time=None, tags=None): """ Create a :py:class:`mlflow.entities.Run` object that can be associated with metrics, parameters, artifacts, etc. Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code. Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by :py:func:`mlflow.log_param`. :param experiment_id: The ID of then experiment to create a run in. :param start_time: If not provided, use the current timestamp. :param tags: A dictionary of key-value pairs that are converted into :py:class:`mlflow.entities.RunTag` objects. :return: :py:class:`mlflow.entities.Run` that was created. """ tags = tags if tags else {} # Extract user from tags # This logic is temporary; the user_id attribute of runs is deprecated and will be removed # in a later release. user_id = tags.get(MLFLOW_USER, "unknown") return self.store.create_run( experiment_id=experiment_id, user_id=user_id, start_time=start_time or int(time.time() * 1000), tags=[RunTag(key, value) for (key, value) in iteritems(tags)])
def test_validate_batch_log_limits(): too_many_metrics = [ Metric("metric-key-%s" % i, 1, 0, i * 2) for i in range(1001) ] too_many_params = [Param("param-key-%s" % i, "b") for i in range(101)] too_many_tags = [RunTag("tag-key-%s" % i, "b") for i in range(101)] good_kwargs = {"metrics": [], "params": [], "tags": []} bad_kwargs = { "metrics": [too_many_metrics], "params": [too_many_params], "tags": [too_many_tags], } for arg_name, arg_values in bad_kwargs.items(): for arg_value in arg_values: final_kwargs = copy.deepcopy(good_kwargs) final_kwargs[arg_name] = arg_value with pytest.raises(MlflowException): _validate_batch_log_limits(**final_kwargs) # Test the case where there are too many entities in aggregate with pytest.raises(MlflowException): _validate_batch_log_limits(too_many_metrics[:900], too_many_params[:51], too_many_tags[:50]) # Test that we don't reject entities within the limit _validate_batch_log_limits(too_many_metrics[:1000], [], []) _validate_batch_log_limits([], too_many_params[:100], []) _validate_batch_log_limits([], [], too_many_tags[:100])
def test_record_logged_model(self): store = self.get_store() run_id = self.create_test_run().info.run_id m = Model(artifact_path="model/path", run_id=run_id, flavors={"tf": "flavor body"}) store.record_logged_model(run_id, m) self._verify_logged( store, run_id=run_id, params=[], metrics=[], tags=[RunTag(MLFLOW_LOGGED_MODELS, json.dumps([m.to_dict()]))]) m2 = Model(artifact_path="some/other/path", run_id=run_id, flavors={"R": { "property": "value" }}) store.record_logged_model(run_id, m2) self._verify_logged(store, run_id, params=[], metrics=[], tags=[ RunTag(MLFLOW_LOGGED_MODELS, json.dumps([m.to_dict(), m2.to_dict()])) ]) m3 = Model(artifact_path="some/other/path2", run_id=run_id, flavors={"R2": { "property": "value" }}) store.record_logged_model(run_id, m3) self._verify_logged( store, run_id, params=[], metrics=[], tags=[ RunTag(MLFLOW_LOGGED_MODELS, json.dumps([m.to_dict(), m2.to_dict(), m3.to_dict()])) ]) with self.assertRaises(TypeError): store.record_logged_model(run_id, m.to_dict())
def test_search_runs_data(): runs = [ create_run(metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")], start=1564675200000, end=1564683035000), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")], start=1564765200000, end=1564783200000) ] with mock.patch('mlflow.tracking.fluent._get_paginated_runs', return_value=runs): pdf = search_runs() data = { 'status': [RunStatus.FINISHED] * 2, 'artifact_uri': [None] * 2, 'run_id': [''] * 2, 'experiment_id': [""] * 2, 'metrics.mse': [0.2, 0.6], 'metrics.loss': [np.nan, 1.2], 'params.param': ["value", None], 'params.param2': [None, "val"], 'params.k': [None, "v"], 'tags.tag': ["value", None], 'tags.tag2': [None, "v2"], 'start_time': [ pd.to_datetime(1564675200000, unit="ms", utc=True), pd.to_datetime(1564765200000, unit="ms", utc=True) ], 'end_time': [ pd.to_datetime(1564683035000, unit="ms", utc=True), pd.to_datetime(1564783200000, unit="ms", utc=True) ] } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def _create(): metrics = [Metric(key=random_str(10), value=random_int(0, 1000), timestamp=int(time.time()) + random_int(-1e4, 1e4), step=random_int())] params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa rd = RunData(metrics=metrics, params=params, tags=tags) return rd, metrics, params, tags
def _set_tag(): request_message = _get_request_message(SetTag()) tag = RunTag(request_message.key, request_message.value) run_id = request_message.run_id or request_message.run_uuid _get_tracking_store().set_tag(run_id, tag) response_message = SetTag.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def set_tag(self, run_id, key, value): """ Set a tag on the run with the specified ID. Value is converted to a string. :param run_id: String ID of the run. :param key: Name of the tag. :param value: Tag value (converted to a string) """ _validate_tag_name(key) tag = RunTag(key, str(value)) self.store.set_tag(run_id, tag)
def set_tags(tags): """ Log a batch of tags for the current run. If no run is active, this method will create a new active run. :param tags: Dictionary of tag_name: String -> value: (String, but will be string-ified if not) :returns: None """ run_id = _get_or_start_run().info.run_id tags_arr = [RunTag(key, str(value)) for key, value in tags.items()] MlflowClient().log_batch(run_id=run_id, metrics=[], params=[], tags=tags_arr)
def _create_run(): request_message = _get_request_message(CreateRun()) tags = [RunTag(tag.key, tag.value) for tag in request_message.tags] run = _get_tracking_store().create_run( experiment_id=request_message.experiment_id, user_id=request_message.user_id, start_time=request_message.start_time, tags=tags) response_message = CreateRun.Response() response_message.run.MergeFrom(run.to_proto()) response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def test_log_batch(mlflow_client, backend_store_uri): experiment_id = mlflow_client.create_experiment('Batch em up') created_run = mlflow_client.create_run(experiment_id) run_id = created_run.info.run_id mlflow_client.log_batch(run_id=run_id, metrics=[Metric("metric", 123.456, 789, 3)], params=[Param("param", "value")], tags=[RunTag("taggity", "do-dah")]) run = mlflow_client.get_run(run_id) assert run.data.metrics.get('metric') == 123.456 assert run.data.params.get('param') == 'value' assert run.data.tags.get('taggity') == 'do-dah' metric_history = mlflow_client.get_metric_history(run_id, "metric") assert len(metric_history) == 1 metric = metric_history[0] assert metric.key == "metric" assert metric.value == 123.456 assert metric.timestamp == 789 assert metric.step == 3
def _log_batch(): _validate_batch_log_api_req(_get_request_json()) request_message = _get_request_message(LogBatch()) metrics = [ Metric.from_proto(proto_metric) for proto_metric in request_message.metrics ] params = [ Param.from_proto(proto_param) for proto_param in request_message.params ] tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags] _get_tracking_store().log_batch(run_id=request_message.run_id, metrics=metrics, params=params, tags=tags) response_message = LogBatch.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def record_logged_model(self, run_id, mlflow_model): if not isinstance(mlflow_model, Model): raise TypeError( "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'" .format(type(mlflow_model))) _validate_run_id(run_id) run_info = self._get_run_info(run_id) check_run_is_active(run_info) model_dict = mlflow_model.to_dict() run_info = self._get_run_info(run_id) path = self._get_tag_path(run_info.experiment_id, run_info.run_id, MLFLOW_LOGGED_MODELS) if os.path.exists(path): with open(path, "r") as f: model_list = json.loads(f.read()) else: model_list = [] tag = RunTag(MLFLOW_LOGGED_MODELS, json.dumps(model_list + [model_dict])) try: self._set_run_tag(run_info, tag) except Exception as e: raise MlflowException(e, INTERNAL_ERROR)
def _get_tag_from_file(parent_path, tag_name): _validate_tag_name(tag_name) tag_data = read_file(parent_path, tag_name) return RunTag(tag_name, tag_data)
def test_log_batch(): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set( [MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) sorted_expected_metrics = sorted(expected_metrics.items(), key=lambda kv: kv[0]) metrics = [ Metric(key=key, value=value, timestamp=t, step=i) for i, (key, value) in enumerate(sorted_expected_metrics) ] params = [ Param(key=key, value=value) for key, value in expected_params.items() ] tags = [ RunTag(key=key, value=value) for key, value in exact_expected_tags.items() ] with start_run() as active_run: run_id = active_run.info.run_id kiwi.tracking.MlflowClient().log_batch(run_id=run_id, metrics=metrics, params=params, tags=tags) client = tracking.MlflowClient() finished_run = client.get_run(run_id) # Validate metrics assert len(finished_run.data.metrics) == 2 for key, value in finished_run.data.metrics.items(): assert expected_metrics[key] == value metric_history0 = client.get_metric_history(run_id, "metric-key0") assert set([(m.value, m.timestamp, m.step) for m in metric_history0]) == set([ (1.0, t, 0), ]) metric_history1 = client.get_metric_history(run_id, "metric-key1") assert set([(m.value, m.timestamp, m.step) for m in metric_history1]) == set([ (4.0, t, 1), ]) # Validate tags (for automatically-set tags) assert len(finished_run.data.tags ) == len(exact_expected_tags) + len(approx_expected_tags) for tag_key, tag_value in finished_run.data.tags.items(): if tag_key in approx_expected_tags: pass else: assert exact_expected_tags[tag_key] == tag_value # Validate params assert finished_run.data.params == expected_params # test that log_batch works with fewer params new_tags = {"1": "2", "3": "4", "5": "6"} tags = [RunTag(key=key, value=value) for key, value in new_tags.items()] client.log_batch(run_id=run_id, tags=tags) finished_run_2 = client.get_run(run_id) # Validate tags (for automatically-set tags) assert len(finished_run_2.data.tags) == len(finished_run.data.tags) + 3 for tag_key, tag_value in finished_run_2.data.tags.items(): if tag_key in new_tags: assert new_tags[tag_key] == tag_value
def test_requestor(self, request): response = mock.MagicMock response.status_code = 200 response.text = '{}' request.return_value = response creds = MlflowHostCreds('https://hello') store = RestStore(lambda: creds) user_name = "mock user" source_name = "rest test" source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=source_name ) source_type_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.LOCAL ) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http, \ mock.patch('mlflow.tracking._tracking_service.utils._get_store', return_value=store), \ mock.patch('mlflow.tracking.context.default_context._get_user', return_value=user_name), \ mock.patch('time.time', return_value=13579), \ source_name_patch, source_type_patch: with kiwi.start_run(experiment_id="43"): cr_body = message_to_json(CreateRun(experiment_id="43", user_id=user_name, start_time=13579000, tags=[ProtoRunTag(key='mlflow.source.name', value=source_name), ProtoRunTag(key='mlflow.source.type', value='LOCAL'), ProtoRunTag(key='mlflow.user', value=user_name)])) expected_kwargs = self._args(creds, "runs/create", "POST", cr_body) assert mock_http.call_count == 1 actual_kwargs = mock_http.call_args[1] # Test the passed tag values separately from the rest of the request # Tag order is inconsistent on Python 2 and 3, but the order does not matter expected_tags = expected_kwargs['json'].pop('tags') actual_tags = actual_kwargs['json'].pop('tags') assert ( sorted(expected_tags, key=lambda t: t['key']) == sorted(actual_tags, key=lambda t: t['key']) ) assert expected_kwargs == actual_kwargs with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.log_param("some_uuid", Param("k1", "v1")) body = message_to_json(LogParam( run_uuid="some_uuid", run_id="some_uuid", key="k1", value="v1")) self._verify_requests(mock_http, creds, "runs/log-parameter", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.set_experiment_tag("some_id", ExperimentTag("t1", "abcd"*1000)) body = message_to_json(SetExperimentTag( experiment_id="some_id", key="t1", value="abcd"*1000)) self._verify_requests(mock_http, creds, "experiments/set-experiment-tag", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.set_tag("some_uuid", RunTag("t1", "abcd"*1000)) body = message_to_json(SetTag( run_uuid="some_uuid", run_id="some_uuid", key="t1", value="abcd"*1000)) self._verify_requests(mock_http, creds, "runs/set-tag", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.delete_tag("some_uuid", "t1") body = message_to_json(DeleteTag(run_id="some_uuid", key="t1")) self._verify_requests(mock_http, creds, "runs/delete-tag", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.log_metric("u2", Metric("m1", 0.87, 12345, 3)) body = message_to_json(LogMetric( run_uuid="u2", run_id="u2", key="m1", value=0.87, timestamp=12345, step=3)) self._verify_requests(mock_http, creds, "runs/log-metric", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: metrics = [Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1), Metric("m3", 0.58, 12345, 2)] params = [Param("p1", "p1val"), Param("p2", "p2val")] tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")] store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags) metric_protos = [metric.to_proto() for metric in metrics] param_protos = [param.to_proto() for param in params] tag_protos = [tag.to_proto() for tag in tags] body = message_to_json(LogBatch(run_id="u2", metrics=metric_protos, params=param_protos, tags=tag_protos)) self._verify_requests(mock_http, creds, "runs/log-batch", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.delete_run("u25") self._verify_requests(mock_http, creds, "runs/delete", "POST", message_to_json(DeleteRun(run_id="u25"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.restore_run("u76") self._verify_requests(mock_http, creds, "runs/restore", "POST", message_to_json(RestoreRun(run_id="u76"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.delete_experiment("0") self._verify_requests(mock_http, creds, "experiments/delete", "POST", message_to_json(DeleteExperiment(experiment_id="0"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.restore_experiment("0") self._verify_requests(mock_http, creds, "experiments/restore", "POST", message_to_json(RestoreExperiment(experiment_id="0"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: response = mock.MagicMock response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}' mock_http.return_value = response result = store.search_runs(["0", "1"], "params.p1 = 'a'", ViewType.ACTIVE_ONLY, max_results=10, order_by=["a"], page_token="12345abcde") expected_message = SearchRuns(experiment_ids=["0", "1"], filter="params.p1 = 'a'", run_view_type=ViewType.to_proto(ViewType.ACTIVE_ONLY), max_results=10, order_by=["a"], page_token="12345abcde") self._verify_requests(mock_http, creds, "runs/search", "POST", message_to_json(expected_message)) assert result.token == "67890fghij" with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: run_id = "run_id" m = Model(artifact_path="model/path", run_id="run_id", flavors={"tf": "flavor body"}) result = store.record_logged_model("run_id", m) expected_message = LogModel(run_id=run_id, model_json=m.to_json()) self._verify_requests(mock_http, creds, "runs/log-model", "POST", message_to_json(expected_message))