Example #1
0
def test_log_batch_validates_entity_names_and_values():
    bad_kwargs = {
        "metrics": [
            [Metric(key="../bad/metric/name", value=0.3, timestamp=3, step=0)],
            [
                Metric(key="ok-name",
                       value="non-numerical-value",
                       timestamp=3,
                       step=0)
            ],
            [
                Metric(key="ok-name",
                       value=0.3,
                       timestamp="non-numerical-timestamp",
                       step=0)
            ],
        ],
        "params": [[Param(key="../bad/param/name", value="my-val")]],
        "tags": [[Param(key="../bad/tag/name", value="my-val")]],
    }
    with start_run() as active_run:
        for kwarg, bad_values in bad_kwargs.items():
            for bad_kwarg_value in bad_values:
                final_kwargs = {
                    "run_id": active_run.info.run_id,
                    "metrics": [],
                    "params": [],
                    "tags": [],
                }
                final_kwargs[kwarg] = bad_kwarg_value
                with pytest.raises(MlflowException) as e:
                    tracking.MlflowClient().log_batch(**final_kwargs)
                assert e.value.error_code == ErrorCode.Name(
                    INVALID_PARAMETER_VALUE)
Example #2
0
def test_validate_batch_log_data():
    metrics_with_bad_key = [
        Metric("good-metric-key", 1.0, 0, 0),
        Metric("super-long-bad-key" * 1000, 4.0, 0, 0)
    ]
    metrics_with_bad_val = [
        Metric("good-metric-key", "not-a-double-val", 0, 0)
    ]
    metrics_with_bad_ts = [
        Metric("good-metric-key", 1.0, "not-a-timestamp", 0)
    ]
    metrics_with_neg_ts = [Metric("good-metric-key", 1.0, -123, 0)]
    metrics_with_bad_step = [Metric("good-metric-key", 1.0, 0, "not-a-step")]
    params_with_bad_key = [
        Param("good-param-key", "hi"),
        Param("super-long-bad-key" * 1000, "but-good-val")
    ]
    params_with_bad_val = [
        Param("good-param-key", "hi"),
        Param("another-good-key", "but-bad-val" * 1000)
    ]
    tags_with_bad_key = [
        RunTag("good-tag-key", "hi"),
        RunTag("super-long-bad-key" * 1000, "but-good-val")
    ]
    tags_with_bad_val = [
        RunTag("good-tag-key", "hi"),
        RunTag("another-good-key", "but-bad-val" * 1000)
    ]
    bad_kwargs = {
        "metrics": [
            metrics_with_bad_key, metrics_with_bad_val, metrics_with_bad_ts,
            metrics_with_neg_ts, metrics_with_bad_step
        ],
        "params": [params_with_bad_key, params_with_bad_val],
        "tags": [tags_with_bad_key, tags_with_bad_val],
    }
    good_kwargs = {"metrics": [], "params": [], "tags": []}
    for arg_name, arg_values in bad_kwargs.items():
        for arg_value in arg_values:
            final_kwargs = copy.deepcopy(good_kwargs)
            final_kwargs[arg_name] = arg_value
            with pytest.raises(MlflowException):
                _validate_batch_log_data(**final_kwargs)
    # Test that we don't reject entities within the limit
    _validate_batch_log_data(metrics=[Metric("metric-key", 1.0, 0, 0)],
                             params=[Param("param-key", "param-val")],
                             tags=[RunTag("tag-key", "tag-val")])
Example #3
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Param`.
        """
        return Param(key=self.key, value=self.value)
Example #4
0
def test_validate_batch_log_limits():
    too_many_metrics = [
        Metric("metric-key-%s" % i, 1, 0, i * 2) for i in range(1001)
    ]
    too_many_params = [Param("param-key-%s" % i, "b") for i in range(101)]
    too_many_tags = [RunTag("tag-key-%s" % i, "b") for i in range(101)]

    good_kwargs = {"metrics": [], "params": [], "tags": []}
    bad_kwargs = {
        "metrics": [too_many_metrics],
        "params": [too_many_params],
        "tags": [too_many_tags],
    }
    for arg_name, arg_values in bad_kwargs.items():
        for arg_value in arg_values:
            final_kwargs = copy.deepcopy(good_kwargs)
            final_kwargs[arg_name] = arg_value
            with pytest.raises(MlflowException):
                _validate_batch_log_limits(**final_kwargs)
    # Test the case where there are too many entities in aggregate
    with pytest.raises(MlflowException):
        _validate_batch_log_limits(too_many_metrics[:900],
                                   too_many_params[:51], too_many_tags[:50])
    # Test that we don't reject entities within the limit
    _validate_batch_log_limits(too_many_metrics[:1000], [], [])
    _validate_batch_log_limits([], too_many_params[:100], [])
    _validate_batch_log_limits([], [], too_many_tags[:100])
Example #5
0
 def log_param(self, run_id, key, value):
     """
     Log a parameter against the run ID. Value is converted to a string.
     """
     _validate_param_name(key)
     param = Param(key, str(value))
     self.store.log_param(run_id, param)
Example #6
0
def test_search_runs_data():
    runs = [
        create_run(metrics=[Metric("mse", 0.2, 0, 0)],
                   params=[Param("param", "value")],
                   tags=[RunTag("tag", "value")],
                   start=1564675200000,
                   end=1564683035000),
        create_run(
            metrics=[Metric("mse", 0.6, 0, 0),
                     Metric("loss", 1.2, 0, 5)],
            params=[Param("param2", "val"),
                    Param("k", "v")],
            tags=[RunTag("tag2", "v2")],
            start=1564765200000,
            end=1564783200000)
    ]
    with mock.patch('mlflow.tracking.fluent._get_paginated_runs',
                    return_value=runs):
        pdf = search_runs()
        data = {
            'status': [RunStatus.FINISHED] * 2,
            'artifact_uri': [None] * 2,
            'run_id': [''] * 2,
            'experiment_id': [""] * 2,
            'metrics.mse': [0.2, 0.6],
            'metrics.loss': [np.nan, 1.2],
            'params.param': ["value", None],
            'params.param2': [None, "val"],
            'params.k': [None, "v"],
            'tags.tag': ["value", None],
            'tags.tag2': [None, "v2"],
            'start_time': [
                pd.to_datetime(1564675200000, unit="ms", utc=True),
                pd.to_datetime(1564765200000, unit="ms", utc=True)
            ],
            'end_time': [
                pd.to_datetime(1564683035000, unit="ms", utc=True),
                pd.to_datetime(1564783200000, unit="ms", utc=True)
            ]
        }
        expected_df = pd.DataFrame(data)
        pd.testing.assert_frame_equal(pdf,
                                      expected_df,
                                      check_like=True,
                                      check_frame_type=False)
Example #7
0
 def _create():
     metrics = [Metric(key=random_str(10),
                       value=random_int(0, 1000),
                       timestamp=int(time.time()) + random_int(-1e4, 1e4),
                       step=random_int())]
     params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     rd = RunData(metrics=metrics, params=params, tags=tags)
     return rd, metrics, params, tags
Example #8
0
def _log_param():
    request_message = _get_request_message(LogParam())
    param = Param(request_message.key, request_message.value)
    run_id = request_message.run_id or request_message.run_uuid
    _get_tracking_store().log_param(run_id, param)
    response_message = LogParam.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Example #9
0
def log_params(params):
    """
    Log a batch of params for the current run. If no run is active, this method will create a
    new active run.

    :param params: Dictionary of param_name: String -> value: (String, but will be string-ified if
                   not)
    :returns: None
    """
    run_id = _get_or_start_run().info.run_id
    params_arr = [Param(key, str(value)) for key, value in params.items()]
    MlflowClient().log_batch(run_id=run_id,
                             metrics=[],
                             params=params_arr,
                             tags=[])
Example #10
0
def test_log_batch(mlflow_client, backend_store_uri):
    experiment_id = mlflow_client.create_experiment('Batch em up')
    created_run = mlflow_client.create_run(experiment_id)
    run_id = created_run.info.run_id
    mlflow_client.log_batch(run_id=run_id,
                            metrics=[Metric("metric", 123.456, 789, 3)],
                            params=[Param("param", "value")],
                            tags=[RunTag("taggity", "do-dah")])
    run = mlflow_client.get_run(run_id)
    assert run.data.metrics.get('metric') == 123.456
    assert run.data.params.get('param') == 'value'
    assert run.data.tags.get('taggity') == 'do-dah'
    metric_history = mlflow_client.get_metric_history(run_id, "metric")
    assert len(metric_history) == 1
    metric = metric_history[0]
    assert metric.key == "metric"
    assert metric.value == 123.456
    assert metric.timestamp == 789
    assert metric.step == 3
Example #11
0
def _log_batch():
    _validate_batch_log_api_req(_get_request_json())
    request_message = _get_request_message(LogBatch())
    metrics = [
        Metric.from_proto(proto_metric)
        for proto_metric in request_message.metrics
    ]
    params = [
        Param.from_proto(proto_param) for proto_param in request_message.params
    ]
    tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags]
    _get_tracking_store().log_batch(run_id=request_message.run_id,
                                    metrics=metrics,
                                    params=params,
                                    tags=tags)
    response_message = LogBatch.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Example #12
0
 def _get_param_from_file(parent_path, param_name):
     _validate_param_name(param_name)
     value = read_file(parent_path, param_name)
     return Param(param_name, value)
Example #13
0
def test_log_batch():
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set(
        [MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    sorted_expected_metrics = sorted(expected_metrics.items(),
                                     key=lambda kv: kv[0])
    metrics = [
        Metric(key=key, value=value, timestamp=t, step=i)
        for i, (key, value) in enumerate(sorted_expected_metrics)
    ]
    params = [
        Param(key=key, value=value) for key, value in expected_params.items()
    ]
    tags = [
        RunTag(key=key, value=value)
        for key, value in exact_expected_tags.items()
    ]

    with start_run() as active_run:
        run_id = active_run.info.run_id
        kiwi.tracking.MlflowClient().log_batch(run_id=run_id,
                                               metrics=metrics,
                                               params=params,
                                               tags=tags)
    client = tracking.MlflowClient()
    finished_run = client.get_run(run_id)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for key, value in finished_run.data.metrics.items():
        assert expected_metrics[key] == value
    metric_history0 = client.get_metric_history(run_id, "metric-key0")
    assert set([(m.value, m.timestamp, m.step)
                for m in metric_history0]) == set([
                    (1.0, t, 0),
                ])
    metric_history1 = client.get_metric_history(run_id, "metric-key1")
    assert set([(m.value, m.timestamp, m.step)
                for m in metric_history1]) == set([
                    (4.0, t, 1),
                ])

    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags
               ) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_value in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag_key] == tag_value
    # Validate params
    assert finished_run.data.params == expected_params
    # test that log_batch works with fewer params
    new_tags = {"1": "2", "3": "4", "5": "6"}
    tags = [RunTag(key=key, value=value) for key, value in new_tags.items()]
    client.log_batch(run_id=run_id, tags=tags)
    finished_run_2 = client.get_run(run_id)
    # Validate tags (for automatically-set tags)
    assert len(finished_run_2.data.tags) == len(finished_run.data.tags) + 3
    for tag_key, tag_value in finished_run_2.data.tags.items():
        if tag_key in new_tags:
            assert new_tags[tag_key] == tag_value
def test_spark_integration():
    key = SparkMLParam(Identifiable(), "name", "doc")
    value = 123
    param = Param(key, value)
    assert param.key == "name"
    assert param.value == "123"
Example #15
0
def _create_run(uri, experiment_id, work_dir, version, entry_point,
                parameters):
    """
    Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI,
    entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be
    used to report additional data about the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking._tracking_service.utils._get_git_url_if_present(
            _expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    source_version = _get_git_commit(work_dir)
    git_diff = _get_git_diff(work_dir)
    existing_run = fluent.active_run()
    if existing_run:
        parent_run_id = existing_run.info.run_id
    else:
        parent_run_id = None

    tags = {
        MLFLOW_USER: _get_user(),
        MLFLOW_SOURCE_NAME: source_name,
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.PROJECT),
        MLFLOW_PROJECT_ENTRY_POINT: entry_point
    }

    # System tags - all include a string header, thus none of them is empty
    tags[KIWI_SYSTEM_HW_CPU] = _get_cpu_info()
    tags[KIWI_SYSTEM_HW_MEMORY] = _get_mem_info()
    tags[KIWI_SYSTEM_HW_DISK] = _get_disk_info()
    tags[KIWI_SYSTEM_HW_GPU] = _get_gpu_info()
    tags[KIWI_SYSTEM_OS] = _get_os_info()

    if source_version is not None:
        tags[MLFLOW_GIT_COMMIT] = source_version
        tags[MLFLOW_GIT_DIFF] = git_diff
    if parent_run_id is not None:
        tags[MLFLOW_PARENT_RUN_ID] = parent_run_id

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        tags[MLFLOW_GIT_REPO_URL] = repo_url
        tags[LEGACY_MLFLOW_GIT_REPO_URL] = repo_url

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        tags[MLFLOW_GIT_BRANCH] = version
        tags[LEGACY_MLFLOW_GIT_BRANCH_NAME] = version
    active_run = tracking.MlflowClient().create_run(
        experiment_id=experiment_id, tags=tags)

    project = _project_spec.load_project(work_dir)
    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(
        parameters, storage_dir=None)
    params_list = [
        Param(key, value) for key, value in list(final_params.items()) +
        list(extra_params.items())
    ]
    tracking.MlflowClient().log_batch(active_run.info.run_id,
                                      params=params_list)
    return active_run
Example #16
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        user_name = "mock user"
        source_name = "rest test"

        source_name_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_name", return_value=source_name
        )
        source_type_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_type",
            return_value=SourceType.LOCAL
        )
        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http, \
                mock.patch('mlflow.tracking._tracking_service.utils._get_store',
                           return_value=store), \
                mock.patch('mlflow.tracking.context.default_context._get_user',
                           return_value=user_name), \
                mock.patch('time.time', return_value=13579), \
                source_name_patch, source_type_patch:
            with kiwi.start_run(experiment_id="43"):
                cr_body = message_to_json(CreateRun(experiment_id="43",
                                                    user_id=user_name, start_time=13579000,
                                                    tags=[ProtoRunTag(key='mlflow.source.name',
                                                                      value=source_name),
                                                          ProtoRunTag(key='mlflow.source.type',
                                                                      value='LOCAL'),
                                                          ProtoRunTag(key='mlflow.user',
                                                                      value=user_name)]))
                expected_kwargs = self._args(creds, "runs/create", "POST", cr_body)

                assert mock_http.call_count == 1
                actual_kwargs = mock_http.call_args[1]

                # Test the passed tag values separately from the rest of the request
                # Tag order is inconsistent on Python 2 and 3, but the order does not matter
                expected_tags = expected_kwargs['json'].pop('tags')
                actual_tags = actual_kwargs['json'].pop('tags')
                assert (
                    sorted(expected_tags, key=lambda t: t['key']) ==
                    sorted(actual_tags, key=lambda t: t['key'])
                )
                assert expected_kwargs == actual_kwargs

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(LogParam(
                run_uuid="some_uuid", run_id="some_uuid", key="k1", value="v1"))
            self._verify_requests(mock_http, creds,
                                  "runs/log-parameter", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.set_experiment_tag("some_id", ExperimentTag("t1", "abcd"*1000))
            body = message_to_json(SetExperimentTag(
                experiment_id="some_id",
                key="t1",
                value="abcd"*1000))
            self._verify_requests(mock_http, creds,
                                  "experiments/set-experiment-tag", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd"*1000))
            body = message_to_json(SetTag(
                run_uuid="some_uuid", run_id="some_uuid", key="t1", value="abcd"*1000))
            self._verify_requests(mock_http, creds,
                                  "runs/set-tag", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_tag("some_uuid", "t1")
            body = message_to_json(DeleteTag(run_id="some_uuid", key="t1"))
            self._verify_requests(mock_http, creds,
                                  "runs/delete-tag", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345, 3))
            body = message_to_json(LogMetric(
                run_uuid="u2", run_id="u2", key="m1", value=0.87, timestamp=12345, step=3))
            self._verify_requests(mock_http, creds,
                                  "runs/log-metric", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            metrics = [Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1),
                       Metric("m3", 0.58, 12345, 2)]
            params = [Param("p1", "p1val"), Param("p2", "p2val")]
            tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")]
            store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags)
            metric_protos = [metric.to_proto() for metric in metrics]
            param_protos = [param.to_proto() for param in params]
            tag_protos = [tag.to_proto() for tag in tags]
            body = message_to_json(LogBatch(run_id="u2", metrics=metric_protos,
                                            params=param_protos, tags=tag_protos))
            self._verify_requests(mock_http, creds,
                                  "runs/log-batch", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds,
                                  "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds,
                                  "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_experiment("0")
            self._verify_requests(mock_http, creds,
                                  "experiments/delete", "POST",
                                  message_to_json(DeleteExperiment(experiment_id="0")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.restore_experiment("0")
            self._verify_requests(mock_http, creds,
                                  "experiments/restore", "POST",
                                  message_to_json(RestoreExperiment(experiment_id="0")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            response = mock.MagicMock
            response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}'
            mock_http.return_value = response
            result = store.search_runs(["0", "1"], "params.p1 = 'a'", ViewType.ACTIVE_ONLY,
                                       max_results=10, order_by=["a"], page_token="12345abcde")

            expected_message = SearchRuns(experiment_ids=["0", "1"], filter="params.p1 = 'a'",
                                          run_view_type=ViewType.to_proto(ViewType.ACTIVE_ONLY),
                                          max_results=10, order_by=["a"], page_token="12345abcde")
            self._verify_requests(mock_http, creds,
                                  "runs/search", "POST",
                                  message_to_json(expected_message))
            assert result.token == "67890fghij"

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            run_id = "run_id"
            m = Model(artifact_path="model/path", run_id="run_id", flavors={"tf": "flavor body"})
            result = store.record_logged_model("run_id", m)
            expected_message = LogModel(run_id=run_id,
                                        model_json=m.to_json())
            self._verify_requests(mock_http, creds,
                                  "runs/log-model", "POST",
                                  message_to_json(expected_message))