def test_log_batch_validates_entity_names_and_values(): bad_kwargs = { "metrics": [ [Metric(key="../bad/metric/name", value=0.3, timestamp=3, step=0)], [ Metric(key="ok-name", value="non-numerical-value", timestamp=3, step=0) ], [ Metric(key="ok-name", value=0.3, timestamp="non-numerical-timestamp", step=0) ], ], "params": [[Param(key="../bad/param/name", value="my-val")]], "tags": [[Param(key="../bad/tag/name", value="my-val")]], } with start_run() as active_run: for kwarg, bad_values in bad_kwargs.items(): for bad_kwarg_value in bad_values: final_kwargs = { "run_id": active_run.info.run_id, "metrics": [], "params": [], "tags": [], } final_kwargs[kwarg] = bad_kwarg_value with pytest.raises(MlflowException) as e: tracking.MlflowClient().log_batch(**final_kwargs) assert e.value.error_code == ErrorCode.Name( INVALID_PARAMETER_VALUE)
def test_validate_batch_log_data(): metrics_with_bad_key = [ Metric("good-metric-key", 1.0, 0, 0), Metric("super-long-bad-key" * 1000, 4.0, 0, 0) ] metrics_with_bad_val = [ Metric("good-metric-key", "not-a-double-val", 0, 0) ] metrics_with_bad_ts = [ Metric("good-metric-key", 1.0, "not-a-timestamp", 0) ] metrics_with_neg_ts = [Metric("good-metric-key", 1.0, -123, 0)] metrics_with_bad_step = [Metric("good-metric-key", 1.0, 0, "not-a-step")] params_with_bad_key = [ Param("good-param-key", "hi"), Param("super-long-bad-key" * 1000, "but-good-val") ] params_with_bad_val = [ Param("good-param-key", "hi"), Param("another-good-key", "but-bad-val" * 1000) ] tags_with_bad_key = [ RunTag("good-tag-key", "hi"), RunTag("super-long-bad-key" * 1000, "but-good-val") ] tags_with_bad_val = [ RunTag("good-tag-key", "hi"), RunTag("another-good-key", "but-bad-val" * 1000) ] bad_kwargs = { "metrics": [ metrics_with_bad_key, metrics_with_bad_val, metrics_with_bad_ts, metrics_with_neg_ts, metrics_with_bad_step ], "params": [params_with_bad_key, params_with_bad_val], "tags": [tags_with_bad_key, tags_with_bad_val], } good_kwargs = {"metrics": [], "params": [], "tags": []} for arg_name, arg_values in bad_kwargs.items(): for arg_value in arg_values: final_kwargs = copy.deepcopy(good_kwargs) final_kwargs[arg_name] = arg_value with pytest.raises(MlflowException): _validate_batch_log_data(**final_kwargs) # Test that we don't reject entities within the limit _validate_batch_log_data(metrics=[Metric("metric-key", 1.0, 0, 0)], params=[Param("param-key", "param-val")], tags=[RunTag("tag-key", "tag-val")])
def to_mlflow_entity(self): """ Convert DB model to corresponding MLflow entity. :return: :py:class:`mlflow.entities.Param`. """ return Param(key=self.key, value=self.value)
def test_validate_batch_log_limits(): too_many_metrics = [ Metric("metric-key-%s" % i, 1, 0, i * 2) for i in range(1001) ] too_many_params = [Param("param-key-%s" % i, "b") for i in range(101)] too_many_tags = [RunTag("tag-key-%s" % i, "b") for i in range(101)] good_kwargs = {"metrics": [], "params": [], "tags": []} bad_kwargs = { "metrics": [too_many_metrics], "params": [too_many_params], "tags": [too_many_tags], } for arg_name, arg_values in bad_kwargs.items(): for arg_value in arg_values: final_kwargs = copy.deepcopy(good_kwargs) final_kwargs[arg_name] = arg_value with pytest.raises(MlflowException): _validate_batch_log_limits(**final_kwargs) # Test the case where there are too many entities in aggregate with pytest.raises(MlflowException): _validate_batch_log_limits(too_many_metrics[:900], too_many_params[:51], too_many_tags[:50]) # Test that we don't reject entities within the limit _validate_batch_log_limits(too_many_metrics[:1000], [], []) _validate_batch_log_limits([], too_many_params[:100], []) _validate_batch_log_limits([], [], too_many_tags[:100])
def log_param(self, run_id, key, value): """ Log a parameter against the run ID. Value is converted to a string. """ _validate_param_name(key) param = Param(key, str(value)) self.store.log_param(run_id, param)
def test_search_runs_data(): runs = [ create_run(metrics=[Metric("mse", 0.2, 0, 0)], params=[Param("param", "value")], tags=[RunTag("tag", "value")], start=1564675200000, end=1564683035000), create_run( metrics=[Metric("mse", 0.6, 0, 0), Metric("loss", 1.2, 0, 5)], params=[Param("param2", "val"), Param("k", "v")], tags=[RunTag("tag2", "v2")], start=1564765200000, end=1564783200000) ] with mock.patch('mlflow.tracking.fluent._get_paginated_runs', return_value=runs): pdf = search_runs() data = { 'status': [RunStatus.FINISHED] * 2, 'artifact_uri': [None] * 2, 'run_id': [''] * 2, 'experiment_id': [""] * 2, 'metrics.mse': [0.2, 0.6], 'metrics.loss': [np.nan, 1.2], 'params.param': ["value", None], 'params.param2': [None, "val"], 'params.k': [None, "v"], 'tags.tag': ["value", None], 'tags.tag2': [None, "v2"], 'start_time': [ pd.to_datetime(1564675200000, unit="ms", utc=True), pd.to_datetime(1564765200000, unit="ms", utc=True) ], 'end_time': [ pd.to_datetime(1564683035000, unit="ms", utc=True), pd.to_datetime(1564783200000, unit="ms", utc=True) ] } expected_df = pd.DataFrame(data) pd.testing.assert_frame_equal(pdf, expected_df, check_like=True, check_frame_type=False)
def _create(): metrics = [Metric(key=random_str(10), value=random_int(0, 1000), timestamp=int(time.time()) + random_int(-1e4, 1e4), step=random_int())] params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)] # noqa rd = RunData(metrics=metrics, params=params, tags=tags) return rd, metrics, params, tags
def _log_param(): request_message = _get_request_message(LogParam()) param = Param(request_message.key, request_message.value) run_id = request_message.run_id or request_message.run_uuid _get_tracking_store().log_param(run_id, param) response_message = LogParam.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def log_params(params): """ Log a batch of params for the current run. If no run is active, this method will create a new active run. :param params: Dictionary of param_name: String -> value: (String, but will be string-ified if not) :returns: None """ run_id = _get_or_start_run().info.run_id params_arr = [Param(key, str(value)) for key, value in params.items()] MlflowClient().log_batch(run_id=run_id, metrics=[], params=params_arr, tags=[])
def test_log_batch(mlflow_client, backend_store_uri): experiment_id = mlflow_client.create_experiment('Batch em up') created_run = mlflow_client.create_run(experiment_id) run_id = created_run.info.run_id mlflow_client.log_batch(run_id=run_id, metrics=[Metric("metric", 123.456, 789, 3)], params=[Param("param", "value")], tags=[RunTag("taggity", "do-dah")]) run = mlflow_client.get_run(run_id) assert run.data.metrics.get('metric') == 123.456 assert run.data.params.get('param') == 'value' assert run.data.tags.get('taggity') == 'do-dah' metric_history = mlflow_client.get_metric_history(run_id, "metric") assert len(metric_history) == 1 metric = metric_history[0] assert metric.key == "metric" assert metric.value == 123.456 assert metric.timestamp == 789 assert metric.step == 3
def _log_batch(): _validate_batch_log_api_req(_get_request_json()) request_message = _get_request_message(LogBatch()) metrics = [ Metric.from_proto(proto_metric) for proto_metric in request_message.metrics ] params = [ Param.from_proto(proto_param) for proto_param in request_message.params ] tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags] _get_tracking_store().log_batch(run_id=request_message.run_id, metrics=metrics, params=params, tags=tags) response_message = LogBatch.Response() response = Response(mimetype='application/json') response.set_data(message_to_json(response_message)) return response
def _get_param_from_file(parent_path, param_name): _validate_param_name(param_name) value = read_file(parent_path, param_name) return Param(param_name, value)
def test_log_batch(): expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0} expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"} exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"} approx_expected_tags = set( [MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) t = int(time.time()) sorted_expected_metrics = sorted(expected_metrics.items(), key=lambda kv: kv[0]) metrics = [ Metric(key=key, value=value, timestamp=t, step=i) for i, (key, value) in enumerate(sorted_expected_metrics) ] params = [ Param(key=key, value=value) for key, value in expected_params.items() ] tags = [ RunTag(key=key, value=value) for key, value in exact_expected_tags.items() ] with start_run() as active_run: run_id = active_run.info.run_id kiwi.tracking.MlflowClient().log_batch(run_id=run_id, metrics=metrics, params=params, tags=tags) client = tracking.MlflowClient() finished_run = client.get_run(run_id) # Validate metrics assert len(finished_run.data.metrics) == 2 for key, value in finished_run.data.metrics.items(): assert expected_metrics[key] == value metric_history0 = client.get_metric_history(run_id, "metric-key0") assert set([(m.value, m.timestamp, m.step) for m in metric_history0]) == set([ (1.0, t, 0), ]) metric_history1 = client.get_metric_history(run_id, "metric-key1") assert set([(m.value, m.timestamp, m.step) for m in metric_history1]) == set([ (4.0, t, 1), ]) # Validate tags (for automatically-set tags) assert len(finished_run.data.tags ) == len(exact_expected_tags) + len(approx_expected_tags) for tag_key, tag_value in finished_run.data.tags.items(): if tag_key in approx_expected_tags: pass else: assert exact_expected_tags[tag_key] == tag_value # Validate params assert finished_run.data.params == expected_params # test that log_batch works with fewer params new_tags = {"1": "2", "3": "4", "5": "6"} tags = [RunTag(key=key, value=value) for key, value in new_tags.items()] client.log_batch(run_id=run_id, tags=tags) finished_run_2 = client.get_run(run_id) # Validate tags (for automatically-set tags) assert len(finished_run_2.data.tags) == len(finished_run.data.tags) + 3 for tag_key, tag_value in finished_run_2.data.tags.items(): if tag_key in new_tags: assert new_tags[tag_key] == tag_value
def test_spark_integration(): key = SparkMLParam(Identifiable(), "name", "doc") value = 123 param = Param(key, value) assert param.key == "name" assert param.value == "123"
def _create_run(uri, experiment_id, work_dir, version, entry_point, parameters): """ Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI, entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be used to report additional data about the run (metrics/params) to the tracking server. """ if _is_local_uri(uri): source_name = tracking._tracking_service.utils._get_git_url_if_present( _expand_uri(uri)) else: source_name = _expand_uri(uri) source_version = _get_git_commit(work_dir) git_diff = _get_git_diff(work_dir) existing_run = fluent.active_run() if existing_run: parent_run_id = existing_run.info.run_id else: parent_run_id = None tags = { MLFLOW_USER: _get_user(), MLFLOW_SOURCE_NAME: source_name, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT), MLFLOW_PROJECT_ENTRY_POINT: entry_point } # System tags - all include a string header, thus none of them is empty tags[KIWI_SYSTEM_HW_CPU] = _get_cpu_info() tags[KIWI_SYSTEM_HW_MEMORY] = _get_mem_info() tags[KIWI_SYSTEM_HW_DISK] = _get_disk_info() tags[KIWI_SYSTEM_HW_GPU] = _get_gpu_info() tags[KIWI_SYSTEM_OS] = _get_os_info() if source_version is not None: tags[MLFLOW_GIT_COMMIT] = source_version tags[MLFLOW_GIT_DIFF] = git_diff if parent_run_id is not None: tags[MLFLOW_PARENT_RUN_ID] = parent_run_id repo_url = _get_git_repo_url(work_dir) if repo_url is not None: tags[MLFLOW_GIT_REPO_URL] = repo_url tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url # Add branch name tag if a branch is specified through -version if _is_valid_branch_name(work_dir, version): tags[MLFLOW_GIT_BRANCH] = version tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] = version active_run = tracking.MlflowClient().create_run( experiment_id=experiment_id, tags=tags) project = _project_spec.load_project(work_dir) # Consolidate parameters for logging. # `storage_dir` is `None` since we want to log actual path not downloaded local path entry_point_obj = project.get_entry_point(entry_point) final_params, extra_params = entry_point_obj.compute_parameters( parameters, storage_dir=None) params_list = [ Param(key, value) for key, value in list(final_params.items()) + list(extra_params.items()) ] tracking.MlflowClient().log_batch(active_run.info.run_id, params=params_list) return active_run
def test_requestor(self, request): response = mock.MagicMock response.status_code = 200 response.text = '{}' request.return_value = response creds = MlflowHostCreds('https://hello') store = RestStore(lambda: creds) user_name = "mock user" source_name = "rest test" source_name_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_name", return_value=source_name ) source_type_patch = mock.patch( "mlflow.tracking.context.default_context._get_source_type", return_value=SourceType.LOCAL ) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http, \ mock.patch('mlflow.tracking._tracking_service.utils._get_store', return_value=store), \ mock.patch('mlflow.tracking.context.default_context._get_user', return_value=user_name), \ mock.patch('time.time', return_value=13579), \ source_name_patch, source_type_patch: with kiwi.start_run(experiment_id="43"): cr_body = message_to_json(CreateRun(experiment_id="43", user_id=user_name, start_time=13579000, tags=[ProtoRunTag(key='mlflow.source.name', value=source_name), ProtoRunTag(key='mlflow.source.type', value='LOCAL'), ProtoRunTag(key='mlflow.user', value=user_name)])) expected_kwargs = self._args(creds, "runs/create", "POST", cr_body) assert mock_http.call_count == 1 actual_kwargs = mock_http.call_args[1] # Test the passed tag values separately from the rest of the request # Tag order is inconsistent on Python 2 and 3, but the order does not matter expected_tags = expected_kwargs['json'].pop('tags') actual_tags = actual_kwargs['json'].pop('tags') assert ( sorted(expected_tags, key=lambda t: t['key']) == sorted(actual_tags, key=lambda t: t['key']) ) assert expected_kwargs == actual_kwargs with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.log_param("some_uuid", Param("k1", "v1")) body = message_to_json(LogParam( run_uuid="some_uuid", run_id="some_uuid", key="k1", value="v1")) self._verify_requests(mock_http, creds, "runs/log-parameter", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.set_experiment_tag("some_id", ExperimentTag("t1", "abcd"*1000)) body = message_to_json(SetExperimentTag( experiment_id="some_id", key="t1", value="abcd"*1000)) self._verify_requests(mock_http, creds, "experiments/set-experiment-tag", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.set_tag("some_uuid", RunTag("t1", "abcd"*1000)) body = message_to_json(SetTag( run_uuid="some_uuid", run_id="some_uuid", key="t1", value="abcd"*1000)) self._verify_requests(mock_http, creds, "runs/set-tag", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.delete_tag("some_uuid", "t1") body = message_to_json(DeleteTag(run_id="some_uuid", key="t1")) self._verify_requests(mock_http, creds, "runs/delete-tag", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.log_metric("u2", Metric("m1", 0.87, 12345, 3)) body = message_to_json(LogMetric( run_uuid="u2", run_id="u2", key="m1", value=0.87, timestamp=12345, step=3)) self._verify_requests(mock_http, creds, "runs/log-metric", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: metrics = [Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1), Metric("m3", 0.58, 12345, 2)] params = [Param("p1", "p1val"), Param("p2", "p2val")] tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")] store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags) metric_protos = [metric.to_proto() for metric in metrics] param_protos = [param.to_proto() for param in params] tag_protos = [tag.to_proto() for tag in tags] body = message_to_json(LogBatch(run_id="u2", metrics=metric_protos, params=param_protos, tags=tag_protos)) self._verify_requests(mock_http, creds, "runs/log-batch", "POST", body) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.delete_run("u25") self._verify_requests(mock_http, creds, "runs/delete", "POST", message_to_json(DeleteRun(run_id="u25"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.restore_run("u76") self._verify_requests(mock_http, creds, "runs/restore", "POST", message_to_json(RestoreRun(run_id="u76"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.delete_experiment("0") self._verify_requests(mock_http, creds, "experiments/delete", "POST", message_to_json(DeleteExperiment(experiment_id="0"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: store.restore_experiment("0") self._verify_requests(mock_http, creds, "experiments/restore", "POST", message_to_json(RestoreExperiment(experiment_id="0"))) with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: response = mock.MagicMock response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}' mock_http.return_value = response result = store.search_runs(["0", "1"], "params.p1 = 'a'", ViewType.ACTIVE_ONLY, max_results=10, order_by=["a"], page_token="12345abcde") expected_message = SearchRuns(experiment_ids=["0", "1"], filter="params.p1 = 'a'", run_view_type=ViewType.to_proto(ViewType.ACTIVE_ONLY), max_results=10, order_by=["a"], page_token="12345abcde") self._verify_requests(mock_http, creds, "runs/search", "POST", message_to_json(expected_message)) assert result.token == "67890fghij" with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http: run_id = "run_id" m = Model(artifact_path="model/path", run_id="run_id", flavors={"tf": "flavor body"}) result = store.record_logged_model("run_id", m) expected_message = LogModel(run_id=run_id, model_json=m.to_json()) self._verify_requests(mock_http, creds, "runs/log-model", "POST", message_to_json(expected_message))