Ejemplo n.º 1
0
def test_client_create_run_overrides(mock_store):
    experiment_id = mock.Mock()
    user = mock.Mock()
    start_time = mock.Mock()
    tags = {
        MLFLOW_USER: user,
        MLFLOW_PARENT_RUN_ID: mock.Mock(),
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        MLFLOW_SOURCE_NAME: mock.Mock(),
        MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(),
        MLFLOW_GIT_COMMIT: mock.Mock(),
        "other-key": "other-value"
    }

    MlflowClient().create_run(experiment_id, start_time, tags)

    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()],
    )
    mock_store.reset_mock()
    MlflowClient().create_run(experiment_id, start_time, tags)
    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()])
Ejemplo n.º 2
0
def test_validate_batch_log_data():
    metrics_with_bad_key = [
        Metric("good-metric-key", 1.0, 0, 0),
        Metric("super-long-bad-key" * 1000, 4.0, 0, 0)
    ]
    metrics_with_bad_val = [
        Metric("good-metric-key", "not-a-double-val", 0, 0)
    ]
    metrics_with_bad_ts = [
        Metric("good-metric-key", 1.0, "not-a-timestamp", 0)
    ]
    metrics_with_neg_ts = [Metric("good-metric-key", 1.0, -123, 0)]
    metrics_with_bad_step = [Metric("good-metric-key", 1.0, 0, "not-a-step")]
    params_with_bad_key = [
        Param("good-param-key", "hi"),
        Param("super-long-bad-key" * 1000, "but-good-val")
    ]
    params_with_bad_val = [
        Param("good-param-key", "hi"),
        Param("another-good-key", "but-bad-val" * 1000)
    ]
    tags_with_bad_key = [
        RunTag("good-tag-key", "hi"),
        RunTag("super-long-bad-key" * 1000, "but-good-val")
    ]
    tags_with_bad_val = [
        RunTag("good-tag-key", "hi"),
        RunTag("another-good-key", "but-bad-val" * 1000)
    ]
    bad_kwargs = {
        "metrics": [
            metrics_with_bad_key, metrics_with_bad_val, metrics_with_bad_ts,
            metrics_with_neg_ts, metrics_with_bad_step
        ],
        "params": [params_with_bad_key, params_with_bad_val],
        "tags": [tags_with_bad_key, tags_with_bad_val],
    }
    good_kwargs = {"metrics": [], "params": [], "tags": []}
    for arg_name, arg_values in bad_kwargs.items():
        for arg_value in arg_values:
            final_kwargs = copy.deepcopy(good_kwargs)
            final_kwargs[arg_name] = arg_value
            with pytest.raises(MlflowException):
                _validate_batch_log_data(**final_kwargs)
    # Test that we don't reject entities within the limit
    _validate_batch_log_data(metrics=[Metric("metric-key", 1.0, 0, 0)],
                             params=[Param("param-key", "param-val")],
                             tags=[RunTag("tag-key", "tag-val")])
Ejemplo n.º 3
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.RunTag`.
        """
        return RunTag(key=self.key, value=self.value)
Ejemplo n.º 4
0
    def create_run(self, experiment_id, start_time=None, tags=None):
        """
        Create a :py:class:`mlflow.entities.Run` object that can be associated with
        metrics, parameters, artifacts, etc.
        Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code.
        Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by
        :py:func:`mlflow.log_param`.

        :param experiment_id: The ID of then experiment to create a run in.
        :param start_time: If not provided, use the current timestamp.
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.RunTag` objects.
        :return: :py:class:`mlflow.entities.Run` that was created.
        """

        tags = tags if tags else {}

        # Extract user from tags
        # This logic is temporary; the user_id attribute of runs is deprecated and will be removed
        # in a later release.
        user_id = tags.get(MLFLOW_USER, "unknown")

        return self.store.create_run(
            experiment_id=experiment_id,
            user_id=user_id,
            start_time=start_time or int(time.time() * 1000),
            tags=[RunTag(key, value) for (key, value) in iteritems(tags)])
Ejemplo n.º 5
0
def test_validate_batch_log_limits():
    too_many_metrics = [
        Metric("metric-key-%s" % i, 1, 0, i * 2) for i in range(1001)
    ]
    too_many_params = [Param("param-key-%s" % i, "b") for i in range(101)]
    too_many_tags = [RunTag("tag-key-%s" % i, "b") for i in range(101)]

    good_kwargs = {"metrics": [], "params": [], "tags": []}
    bad_kwargs = {
        "metrics": [too_many_metrics],
        "params": [too_many_params],
        "tags": [too_many_tags],
    }
    for arg_name, arg_values in bad_kwargs.items():
        for arg_value in arg_values:
            final_kwargs = copy.deepcopy(good_kwargs)
            final_kwargs[arg_name] = arg_value
            with pytest.raises(MlflowException):
                _validate_batch_log_limits(**final_kwargs)
    # Test the case where there are too many entities in aggregate
    with pytest.raises(MlflowException):
        _validate_batch_log_limits(too_many_metrics[:900],
                                   too_many_params[:51], too_many_tags[:50])
    # Test that we don't reject entities within the limit
    _validate_batch_log_limits(too_many_metrics[:1000], [], [])
    _validate_batch_log_limits([], too_many_params[:100], [])
    _validate_batch_log_limits([], [], too_many_tags[:100])
Ejemplo n.º 6
0
 def test_record_logged_model(self):
     store = self.get_store()
     run_id = self.create_test_run().info.run_id
     m = Model(artifact_path="model/path",
               run_id=run_id,
               flavors={"tf": "flavor body"})
     store.record_logged_model(run_id, m)
     self._verify_logged(
         store,
         run_id=run_id,
         params=[],
         metrics=[],
         tags=[RunTag(MLFLOW_LOGGED_MODELS, json.dumps([m.to_dict()]))])
     m2 = Model(artifact_path="some/other/path",
                run_id=run_id,
                flavors={"R": {
                    "property": "value"
                }})
     store.record_logged_model(run_id, m2)
     self._verify_logged(store,
                         run_id,
                         params=[],
                         metrics=[],
                         tags=[
                             RunTag(MLFLOW_LOGGED_MODELS,
                                    json.dumps([m.to_dict(),
                                                m2.to_dict()]))
                         ])
     m3 = Model(artifact_path="some/other/path2",
                run_id=run_id,
                flavors={"R2": {
                    "property": "value"
                }})
     store.record_logged_model(run_id, m3)
     self._verify_logged(
         store,
         run_id,
         params=[],
         metrics=[],
         tags=[
             RunTag(MLFLOW_LOGGED_MODELS,
                    json.dumps([m.to_dict(),
                                m2.to_dict(),
                                m3.to_dict()]))
         ])
     with self.assertRaises(TypeError):
         store.record_logged_model(run_id, m.to_dict())
Ejemplo n.º 7
0
def test_search_runs_data():
    runs = [
        create_run(metrics=[Metric("mse", 0.2, 0, 0)],
                   params=[Param("param", "value")],
                   tags=[RunTag("tag", "value")],
                   start=1564675200000,
                   end=1564683035000),
        create_run(
            metrics=[Metric("mse", 0.6, 0, 0),
                     Metric("loss", 1.2, 0, 5)],
            params=[Param("param2", "val"),
                    Param("k", "v")],
            tags=[RunTag("tag2", "v2")],
            start=1564765200000,
            end=1564783200000)
    ]
    with mock.patch('mlflow.tracking.fluent._get_paginated_runs',
                    return_value=runs):
        pdf = search_runs()
        data = {
            'status': [RunStatus.FINISHED] * 2,
            'artifact_uri': [None] * 2,
            'run_id': [''] * 2,
            'experiment_id': [""] * 2,
            'metrics.mse': [0.2, 0.6],
            'metrics.loss': [np.nan, 1.2],
            'params.param': ["value", None],
            'params.param2': [None, "val"],
            'params.k': [None, "v"],
            'tags.tag': ["value", None],
            'tags.tag2': [None, "v2"],
            'start_time': [
                pd.to_datetime(1564675200000, unit="ms", utc=True),
                pd.to_datetime(1564765200000, unit="ms", utc=True)
            ],
            'end_time': [
                pd.to_datetime(1564683035000, unit="ms", utc=True),
                pd.to_datetime(1564783200000, unit="ms", utc=True)
            ]
        }
        expected_df = pd.DataFrame(data)
        pd.testing.assert_frame_equal(pdf,
                                      expected_df,
                                      check_like=True,
                                      check_frame_type=False)
Ejemplo n.º 8
0
 def _create():
     metrics = [Metric(key=random_str(10),
                       value=random_int(0, 1000),
                       timestamp=int(time.time()) + random_int(-1e4, 1e4),
                       step=random_int())]
     params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     rd = RunData(metrics=metrics, params=params, tags=tags)
     return rd, metrics, params, tags
Ejemplo n.º 9
0
def _set_tag():
    request_message = _get_request_message(SetTag())
    tag = RunTag(request_message.key, request_message.value)
    run_id = request_message.run_id or request_message.run_uuid
    _get_tracking_store().set_tag(run_id, tag)
    response_message = SetTag.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Ejemplo n.º 10
0
    def set_tag(self, run_id, key, value):
        """
        Set a tag on the run with the specified ID. Value is converted to a string.

        :param run_id: String ID of the run.
        :param key: Name of the tag.
        :param value: Tag value (converted to a string)
        """
        _validate_tag_name(key)
        tag = RunTag(key, str(value))
        self.store.set_tag(run_id, tag)
Ejemplo n.º 11
0
def set_tags(tags):
    """
    Log a batch of tags for the current run. If no run is active, this method will create a
    new active run.

    :param tags: Dictionary of tag_name: String -> value: (String, but will be string-ified if
                 not)
    :returns: None
    """
    run_id = _get_or_start_run().info.run_id
    tags_arr = [RunTag(key, str(value)) for key, value in tags.items()]
    MlflowClient().log_batch(run_id=run_id,
                             metrics=[],
                             params=[],
                             tags=tags_arr)
Ejemplo n.º 12
0
def _create_run():
    request_message = _get_request_message(CreateRun())

    tags = [RunTag(tag.key, tag.value) for tag in request_message.tags]
    run = _get_tracking_store().create_run(
        experiment_id=request_message.experiment_id,
        user_id=request_message.user_id,
        start_time=request_message.start_time,
        tags=tags)

    response_message = CreateRun.Response()
    response_message.run.MergeFrom(run.to_proto())
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Ejemplo n.º 13
0
def test_log_batch(mlflow_client, backend_store_uri):
    experiment_id = mlflow_client.create_experiment('Batch em up')
    created_run = mlflow_client.create_run(experiment_id)
    run_id = created_run.info.run_id
    mlflow_client.log_batch(run_id=run_id,
                            metrics=[Metric("metric", 123.456, 789, 3)],
                            params=[Param("param", "value")],
                            tags=[RunTag("taggity", "do-dah")])
    run = mlflow_client.get_run(run_id)
    assert run.data.metrics.get('metric') == 123.456
    assert run.data.params.get('param') == 'value'
    assert run.data.tags.get('taggity') == 'do-dah'
    metric_history = mlflow_client.get_metric_history(run_id, "metric")
    assert len(metric_history) == 1
    metric = metric_history[0]
    assert metric.key == "metric"
    assert metric.value == 123.456
    assert metric.timestamp == 789
    assert metric.step == 3
Ejemplo n.º 14
0
def _log_batch():
    _validate_batch_log_api_req(_get_request_json())
    request_message = _get_request_message(LogBatch())
    metrics = [
        Metric.from_proto(proto_metric)
        for proto_metric in request_message.metrics
    ]
    params = [
        Param.from_proto(proto_param) for proto_param in request_message.params
    ]
    tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags]
    _get_tracking_store().log_batch(run_id=request_message.run_id,
                                    metrics=metrics,
                                    params=params,
                                    tags=tags)
    response_message = LogBatch.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Ejemplo n.º 15
0
    def record_logged_model(self, run_id, mlflow_model):
        if not isinstance(mlflow_model, Model):
            raise TypeError(
                "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'"
                .format(type(mlflow_model)))
        _validate_run_id(run_id)
        run_info = self._get_run_info(run_id)
        check_run_is_active(run_info)
        model_dict = mlflow_model.to_dict()
        run_info = self._get_run_info(run_id)
        path = self._get_tag_path(run_info.experiment_id, run_info.run_id,
                                  MLFLOW_LOGGED_MODELS)
        if os.path.exists(path):
            with open(path, "r") as f:
                model_list = json.loads(f.read())
        else:
            model_list = []
        tag = RunTag(MLFLOW_LOGGED_MODELS,
                     json.dumps(model_list + [model_dict]))

        try:
            self._set_run_tag(run_info, tag)
        except Exception as e:
            raise MlflowException(e, INTERNAL_ERROR)
Ejemplo n.º 16
0
 def _get_tag_from_file(parent_path, tag_name):
     _validate_tag_name(tag_name)
     tag_data = read_file(parent_path, tag_name)
     return RunTag(tag_name, tag_data)
Ejemplo n.º 17
0
def test_log_batch():
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set(
        [MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    sorted_expected_metrics = sorted(expected_metrics.items(),
                                     key=lambda kv: kv[0])
    metrics = [
        Metric(key=key, value=value, timestamp=t, step=i)
        for i, (key, value) in enumerate(sorted_expected_metrics)
    ]
    params = [
        Param(key=key, value=value) for key, value in expected_params.items()
    ]
    tags = [
        RunTag(key=key, value=value)
        for key, value in exact_expected_tags.items()
    ]

    with start_run() as active_run:
        run_id = active_run.info.run_id
        kiwi.tracking.MlflowClient().log_batch(run_id=run_id,
                                               metrics=metrics,
                                               params=params,
                                               tags=tags)
    client = tracking.MlflowClient()
    finished_run = client.get_run(run_id)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for key, value in finished_run.data.metrics.items():
        assert expected_metrics[key] == value
    metric_history0 = client.get_metric_history(run_id, "metric-key0")
    assert set([(m.value, m.timestamp, m.step)
                for m in metric_history0]) == set([
                    (1.0, t, 0),
                ])
    metric_history1 = client.get_metric_history(run_id, "metric-key1")
    assert set([(m.value, m.timestamp, m.step)
                for m in metric_history1]) == set([
                    (4.0, t, 1),
                ])

    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags
               ) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_value in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag_key] == tag_value
    # Validate params
    assert finished_run.data.params == expected_params
    # test that log_batch works with fewer params
    new_tags = {"1": "2", "3": "4", "5": "6"}
    tags = [RunTag(key=key, value=value) for key, value in new_tags.items()]
    client.log_batch(run_id=run_id, tags=tags)
    finished_run_2 = client.get_run(run_id)
    # Validate tags (for automatically-set tags)
    assert len(finished_run_2.data.tags) == len(finished_run.data.tags) + 3
    for tag_key, tag_value in finished_run_2.data.tags.items():
        if tag_key in new_tags:
            assert new_tags[tag_key] == tag_value
Ejemplo n.º 18
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        user_name = "mock user"
        source_name = "rest test"

        source_name_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_name", return_value=source_name
        )
        source_type_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_type",
            return_value=SourceType.LOCAL
        )
        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http, \
                mock.patch('mlflow.tracking._tracking_service.utils._get_store',
                           return_value=store), \
                mock.patch('mlflow.tracking.context.default_context._get_user',
                           return_value=user_name), \
                mock.patch('time.time', return_value=13579), \
                source_name_patch, source_type_patch:
            with kiwi.start_run(experiment_id="43"):
                cr_body = message_to_json(CreateRun(experiment_id="43",
                                                    user_id=user_name, start_time=13579000,
                                                    tags=[ProtoRunTag(key='mlflow.source.name',
                                                                      value=source_name),
                                                          ProtoRunTag(key='mlflow.source.type',
                                                                      value='LOCAL'),
                                                          ProtoRunTag(key='mlflow.user',
                                                                      value=user_name)]))
                expected_kwargs = self._args(creds, "runs/create", "POST", cr_body)

                assert mock_http.call_count == 1
                actual_kwargs = mock_http.call_args[1]

                # Test the passed tag values separately from the rest of the request
                # Tag order is inconsistent on Python 2 and 3, but the order does not matter
                expected_tags = expected_kwargs['json'].pop('tags')
                actual_tags = actual_kwargs['json'].pop('tags')
                assert (
                    sorted(expected_tags, key=lambda t: t['key']) ==
                    sorted(actual_tags, key=lambda t: t['key'])
                )
                assert expected_kwargs == actual_kwargs

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(LogParam(
                run_uuid="some_uuid", run_id="some_uuid", key="k1", value="v1"))
            self._verify_requests(mock_http, creds,
                                  "runs/log-parameter", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.set_experiment_tag("some_id", ExperimentTag("t1", "abcd"*1000))
            body = message_to_json(SetExperimentTag(
                experiment_id="some_id",
                key="t1",
                value="abcd"*1000))
            self._verify_requests(mock_http, creds,
                                  "experiments/set-experiment-tag", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd"*1000))
            body = message_to_json(SetTag(
                run_uuid="some_uuid", run_id="some_uuid", key="t1", value="abcd"*1000))
            self._verify_requests(mock_http, creds,
                                  "runs/set-tag", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_tag("some_uuid", "t1")
            body = message_to_json(DeleteTag(run_id="some_uuid", key="t1"))
            self._verify_requests(mock_http, creds,
                                  "runs/delete-tag", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345, 3))
            body = message_to_json(LogMetric(
                run_uuid="u2", run_id="u2", key="m1", value=0.87, timestamp=12345, step=3))
            self._verify_requests(mock_http, creds,
                                  "runs/log-metric", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            metrics = [Metric("m1", 0.87, 12345, 0), Metric("m2", 0.49, 12345, -1),
                       Metric("m3", 0.58, 12345, 2)]
            params = [Param("p1", "p1val"), Param("p2", "p2val")]
            tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")]
            store.log_batch(run_id="u2", metrics=metrics, params=params, tags=tags)
            metric_protos = [metric.to_proto() for metric in metrics]
            param_protos = [param.to_proto() for param in params]
            tag_protos = [tag.to_proto() for tag in tags]
            body = message_to_json(LogBatch(run_id="u2", metrics=metric_protos,
                                            params=param_protos, tags=tag_protos))
            self._verify_requests(mock_http, creds,
                                  "runs/log-batch", "POST", body)

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds,
                                  "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds,
                                  "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.delete_experiment("0")
            self._verify_requests(mock_http, creds,
                                  "experiments/delete", "POST",
                                  message_to_json(DeleteExperiment(experiment_id="0")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            store.restore_experiment("0")
            self._verify_requests(mock_http, creds,
                                  "experiments/restore", "POST",
                                  message_to_json(RestoreExperiment(experiment_id="0")))

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            response = mock.MagicMock
            response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}'
            mock_http.return_value = response
            result = store.search_runs(["0", "1"], "params.p1 = 'a'", ViewType.ACTIVE_ONLY,
                                       max_results=10, order_by=["a"], page_token="12345abcde")

            expected_message = SearchRuns(experiment_ids=["0", "1"], filter="params.p1 = 'a'",
                                          run_view_type=ViewType.to_proto(ViewType.ACTIVE_ONLY),
                                          max_results=10, order_by=["a"], page_token="12345abcde")
            self._verify_requests(mock_http, creds,
                                  "runs/search", "POST",
                                  message_to_json(expected_message))
            assert result.token == "67890fghij"

        with mock.patch('mlflow.utils.rest_utils.http_request') as mock_http:
            run_id = "run_id"
            m = Model(artifact_path="model/path", run_id="run_id", flavors={"tf": "flavor body"})
            result = store.record_logged_model("run_id", m)
            expected_message = LogModel(run_id=run_id,
                                        model_json=m.to_json())
            self._verify_requests(mock_http, creds,
                                  "runs/log-model", "POST",
                                  message_to_json(expected_message))