Example #1
0
def test_log_batch(tracking_uri_mock):
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    metrics = [
        Metric(key=key, value=value, timestamp=t)
        for key, value in expected_metrics.items()
    ]
    params = [
        Param(key=key, value=value) for key, value in expected_params.items()
    ]
    tags = [
        RunTag(key=key, value=value)
        for key, value in exact_expected_tags.items()
    ]

    active_run = start_run()
    run_uuid = active_run.info.run_uuid
    with active_run:
        mlflow.tracking.MlflowClient().log_batch(run_id=run_uuid,
                                                 metrics=metrics,
                                                 params=params,
                                                 tags=tags)
    finished_run = tracking.MlflowClient().get_run(run_uuid)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for metric in finished_run.data.metrics:
        assert expected_metrics[metric.key] == metric.value
    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags
               ) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag in finished_run.data.tags:
        if tag.key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag.key] == tag.value
    # Validate params
    assert len(finished_run.data.params) == 2
    for param in finished_run.data.params:
        assert expected_params[param.key] == param.value
def test_log_batch(mlflow_client, backend_store_uri):
    experiment_id = mlflow_client.create_experiment('Batch em up')
    created_run = mlflow_client.create_run(experiment_id)
    run_id = created_run.info.run_id
    mlflow_client.log_batch(
        run_id=run_id,
        metrics=[Metric("metric", 123.456, 789, 3)], params=[Param("param", "value")],
        tags=[RunTag("taggity", "do-dah")])
    run = mlflow_client.get_run(run_id)
    assert run.data.metrics.get('metric') == 123.456
    assert run.data.params.get('param') == 'value'
    assert run.data.tags.get('taggity') == 'do-dah'
    metric_history = mlflow_client.get_metric_history(run_id, "metric")
    assert len(metric_history) == 1
    metric = metric_history[0]
    assert metric.key == "metric"
    assert metric.value == 123.456
    assert metric.timestamp == 789
    assert metric.step == 3
Example #3
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the initialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        if isinstance(model, SqlRun):
            if base is RunData:
                # Run data contains list for metrics, params and tags
                # so obj will be a list so we need to convert those items
                if k == 'metrics':
                    # only get latest recorded metrics per key
                    metrics = {}
                    for o in obj:
                        existing_metric = metrics.get(o.key)
                        if (existing_metric is None)\
                            or ((o.step, o.timestamp, o.value) >=
                                (existing_metric.step, existing_metric.timestamp,
                                 existing_metric.value)):
                            metrics[o.key] = Metric(o.key, o.value,
                                                    o.timestamp, o.step)
                    obj = list(metrics.values())
                elif k == 'params':
                    obj = [Param(o.key, o.value) for o in obj]
                elif k == 'tags':
                    obj = [RunTag(o.key, o.value) for o in obj]
            elif base is RunInfo:
                if k == 'source_type':
                    obj = SourceType.from_string(obj)
                elif k == "status":
                    obj = RunStatus.from_string(obj)
                elif k == "experiment_id":
                    obj = str(obj)

        # Our data model defines experiment_ids as ints, but the in-memory representation was
        # changed to be a string in time for 1.0.
        if isinstance(model, SqlExperiment) and k == "experiment_id":
            obj = str(obj)

        config[k] = obj
    return base(**config)
Example #4
0
def _log_batch():
    _validate_batch_log_api_req(_get_request_json())
    request_message = _get_request_message(LogBatch())
    metrics = [
        Metric.from_proto(proto_metric)
        for proto_metric in request_message.metrics
    ]
    params = [
        Param.from_proto(proto_param) for proto_param in request_message.params
    ]
    tags = [RunTag.from_proto(proto_tag) for proto_tag in request_message.tags]
    _get_tracking_store().log_batch(run_id=request_message.run_id,
                                    metrics=metrics,
                                    params=params,
                                    tags=tags)
    response_message = LogBatch.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Example #5
0
def test_log_batch(tracking_uri_mock, tmpdir):
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    sorted_expected_metrics = sorted(expected_metrics.items(), key=lambda kv: kv[0])
    metrics = [Metric(key=key, value=value, timestamp=t, step=i)
               for i, (key, value) in enumerate(sorted_expected_metrics)]
    params = [Param(key=key, value=value) for key, value in expected_params.items()]
    tags = [RunTag(key=key, value=value) for key, value in exact_expected_tags.items()]

    with start_run() as active_run:
        run_id = active_run.info.run_id
        mlflow.tracking.MlflowClient().log_batch(run_id=run_id, metrics=metrics, params=params,
                                                 tags=tags)
    client = tracking.MlflowClient()
    finished_run = client.get_run(run_id)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for key, value in finished_run.data.metrics.items():
        assert expected_metrics[key] == value
    metric_history0 = client.get_metric_history(run_id, "metric-key0")
    assert set([(m.value, m.timestamp, m.step) for m in metric_history0]) == set([
        (1.0, t, 0),
    ])
    metric_history1 = client.get_metric_history(run_id, "metric-key1")
    assert set([(m.value, m.timestamp, m.step) for m in metric_history1]) == set([
        (4.0, t, 1),
    ])

    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_value in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag_key] == tag_value
    # Validate params
    assert finished_run.data.params == expected_params
Example #6
0
    def test_log_batch_internal_error(self):
        # Verify that internal errors during the DB save step for log_batch result in
        # MlflowExceptions
        run = self._run_factory()

        def _raise_exception_fn(*args, **kwargs):  # pylint: disable=unused-argument
            raise Exception("Some internal error")
        with mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_metric") as metric_mock,\
                mock.patch(
                    "mlflow.store.sqlalchemy_store.SqlAlchemyStore.log_param") as param_mock,\
                mock.patch("mlflow.store.sqlalchemy_store.SqlAlchemyStore.set_tag") as tags_mock:
            metric_mock.side_effect = _raise_exception_fn
            param_mock.side_effect = _raise_exception_fn
            tags_mock.side_effect = _raise_exception_fn
            for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]},
                           {"tags": [RunTag("c", "d")]}]:
                log_batch_kwargs = {"metrics": [], "params": [], "tags": []}
                log_batch_kwargs.update(kwargs)
                with self.assertRaises(MlflowException) as e:
                    self.store.log_batch(run.info.run_uuid, **log_batch_kwargs)
                self.assertIn(str(e.exception.message), "Some internal error")
Example #7
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the intialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        # Run data contains list for metrics, params and tags
        # so obj will be a list so we need to convert those items
        if k == 'metrics':
            obj = [Metric(o.key, o.value, o.timestamp) for o in obj]

        if k == 'params':
            obj = [Param(o.key, o.value) for o in obj]

        if k == 'tags':
            obj = [RunTag(o.key, o.value) for o in obj]

        config[k] = obj
    return base(**config)
Example #8
0
    def _generate_run(self, i, runs_dict):
        """
        Generate a run object and save to runs_dict keyed by run_id.
        Most of data just depends on i, and some data are hard-coded for simplicityGenerate n number of runs. Most of
        data just depends on n, and some data are hard-coded for simplicity.
        """
        key = f"key{i}"
        value = f"value{i}"
        start_time = 123456 * i
        end_time = start_time + (1000 * i)
        run_id = f"run_id_{i}"

        metrics = [Metric(key, value, start_time, "stage")]
        params = [Param(key, value)]
        tags = [RunTag(key, value)]
        run_info = RunInfo(run_id, "experiment_id", "user_id", "status",
                           start_time, end_time, "lifecycle_stage")
        run = Run(run_info=run_info,
                  run_data=RunData(metrics=metrics, params=params, tags=tags))
        runs_dict[run_id] = run
        return run
Example #9
0
def test_log_batch(mlflow_client):
    experiment_id = mlflow_client.create_experiment('Batch em up')
    created_run = mlflow_client.create_run(experiment_id)
    run_id = created_run.info.run_uuid
    # TODO(sid): pass and assert on step
    mlflow_client.log_batch(run_id=run_id,
                            metrics=[Metric("metric", 123.456, 789, 0)],
                            params=[Param("param", "value")],
                            tags=[RunTag("taggity", "do-dah")])
    run = mlflow_client.get_run(run_id)
    assert run.data.metrics.get('metric') == 123.456
    assert run.data.params.get('param') == 'value'
    assert run.data.tags.get('taggity') == 'do-dah'
    # TODO(sid): replace this with mlflow_client.get_metric_history
    fs = FileStore(server_root_dir)
    metric_history = fs.get_metric_history(run_id, "metric")
    assert len(metric_history) == 1
    metric = metric_history[0]
    assert metric.key == "metric"
    assert metric.value == 123.456
    assert metric.timestamp == 789
Example #10
0
 def _create():
     metrics = [
         Metric(random_str(10), random_int(0, 1000),
                int(time.time() + random_int(-1e4, 1e4)))
         for _ in range(100)
     ]
     params = [
         Param(random_str(10), random_str(random_int(10, 35)))
         for _ in range(10)
     ]  # noqa
     tags = [
         RunTag(random_str(10), random_str(random_int(10, 35)))
         for _ in range(10)
     ]  # noqa
     rd = RunData()
     for p in params:
         rd._add_param(p)
     for m in metrics:
         rd._add_metric(m)
     for t in tags:
         rd._add_tag(t)
     return rd, metrics, params, tags
Example #11
0
    def test_log_batch_internal_error(self):
        # Verify that internal errors during log_batch result in MlflowExceptions
        fs = FileStore(self.test_root)
        run = self._create_run(fs)

        def _raise_exception_fn(*args, **kwargs):  # pylint: disable=unused-argument
            raise Exception("Some internal error")
        with mock.patch("mlflow.store.file_store.FileStore.log_metric") as log_metric_mock, \
                mock.patch("mlflow.store.file_store.FileStore.log_param") as log_param_mock, \
                mock.patch("mlflow.store.file_store.FileStore.set_tag") as set_tag_mock:
            log_metric_mock.side_effect = _raise_exception_fn
            log_param_mock.side_effect = _raise_exception_fn
            set_tag_mock.side_effect = _raise_exception_fn
            for kwargs in [{"metrics": [Metric("a", 3, 1)]}, {"params": [Param("b", "c")]},
                           {"tags": [RunTag("c", "d")]}]:
                log_batch_kwargs = {"metrics": [], "params": [], "tags": []}
                log_batch_kwargs.update(kwargs)
                print(log_batch_kwargs)
                with self.assertRaises(MlflowException) as e:
                    fs.log_batch(run.info.run_uuid, **log_batch_kwargs)
                self.assertIn(str(e.exception.message), "Some internal error")
                assert e.exception.error_code == ErrorCode.Name(INTERNAL_ERROR)
Example #12
0
def log_params(params):
    """
    Log a batch of params for the current run. If no run is active, this method will create a
    new active run.

    :param params: Dictionary of param_name: String -> value: (String, but will be string-ified if
                   not)
    :returns: None

    .. code-block:: python
        :caption: Example

        import mlflow

        params = {"learning_rate": 0.01, "n_estimators": 10}

        # Log a batch of parameters
        with mlflow.start_run():
            mlflow.log_params(params)
    """
    run_id = _get_or_start_run().info.run_id
    params_arr = [Param(key, str(value)) for key, value in params.items()]
    MlflowClient().log_batch(run_id=run_id, metrics=[], params=params_arr, tags=[])
Example #13
0
    def import_run_data(self, run_dct, run_id, src_user_id):
        from mlflow.entities import Metric, Param, RunTag
        now = round(time.time())
        params = [Param(k, v) for k, v in run_dct['params'].items()]
        metrics = [
            Metric(k, v, now, 0) for k, v in run_dct['metrics'].items()
        ]  # TODO: missing timestamp and step semantics?

        tags = run_dct['tags']
        if not self.import_mlflow_tags:  # remove mlflow tags
            keys = [k for k in tags.keys() if k.startswith("mlflow.")]
            for k in keys:
                tags.pop(k)
        if not self.import_mlflow_tools_tags:  # remove mlflow_tools tags
            keys = [k for k in tags.keys() if k.startswith("mlflow_tools.")]
            for k in keys:
                tags.pop(k)
        tags = [RunTag(k, str(v)) for k, v in tags.items()]

        if not self.in_databricks:
            utils.set_dst_user_id(tags, src_user_id, self.use_src_user_id)
        #self.dump_tags(tags)
        self.client.log_batch(run_id, metrics, params, tags)
Example #14
0
 def _create_run(self, run):
     args = {"run_name": run.run_name} if run.run_name else {}
     with mlflow.start_run(**args) as active_run:
         params = [Param(k, v) for k, v in run.params.items()]
         metrics = [
             Metric(k, v, int(time.time()), 0)
             for k, v in run.metrics.items()
         ]  # TODO: timestamp and step?
         tags = [RunTag(k, v) for k, v in run.tags.items()]
         mlflow.tracking.MlflowClient().log_batch(active_run.info.run_id,
                                                  metrics, params, tags)
         if self.do_tag:
             mlflow.set_tag("replayed", "true")
         for m in run.models:
             if m.model:
                 m.log_model_func(m.model, m.model_name)
         for a in run.artifacts:
             if a.artifact_local_path:
                 if a.artifact_local_path:
                     with open(a.artifact_local_path, "wb") as f:
                         f.write(a.artifact_bytes)
                 mlflow.log_artifact(a.artifact_local_path, a.artifact_path)
         return active_run.info.run_id
Example #15
0
def get_machine_log_items(
        machine: Machine) -> Tuple[List[Metric], List[Param]]:
    """
    Create flat lists of MLflow logging entities from multilevel dictionary

    For more information, see the mlflow docs:
    https://www.mlflow.org/docs/latest/python_api/mlflow.tracking.html#mlflow.tracking.MlflowClient.log_batch

    Parameters
    ----------
    machine: Machine

    Returns
    -------
    metrics: List[Metric]
        List of MLFlow Metric objects to log.
    params: List[Param]
        List of MLFlow Param objects to log.
    """

    metrics: List[Metric] = list()
    build_metadata = machine.metadata.build_metadata

    # Project/machine parameters
    keys = ["project_name", "name"]
    params = [Param(attr, getattr(machine, attr)) for attr in keys]

    # Dataset parameters
    dataset_keys = [
        "train_start_date",
        "train_end_date",
        "resolution",
        "row_filter",
        "row_filter_buffer_size",
    ]
    params.extend(
        Param(k, str(getattr(machine.dataset, k))) for k in dataset_keys)

    # Model parameters
    model_keys = [
        "model_creation_date", "model_builder_version", "model_offset"
    ]
    params.extend(
        Param(k, str(getattr(build_metadata.model, k))) for k in model_keys)

    # Parse cross-validation split metadata
    splits = build_metadata.model.cross_validation.splits
    params.extend(Param(k, str(v)) for k, v in splits.items())

    # Parse cross-validation metrics

    tag_list = normalize_sensor_tags(machine.dataset.tag_list,
                                     asset=machine.dataset.asset)
    scores = build_metadata.model.cross_validation.scores

    keys = sorted(list(scores.keys()))
    subkeys = ["mean", "max", "min", "std"]

    n_folds = len(scores[keys[0]]) - len(subkeys)
    for k in keys:
        # Skip per tag data, produces too many params for MLflow
        if any([t.name in k for t in tag_list]):
            continue

        # Summary stats per metric
        for sk in subkeys:
            metrics.append(
                Metric(f"{k}-{sk}", scores[k][f"fold-{sk}"], epoch_now(), 0))
        # Append value for each fold with increasing steps
        metrics.extend(
            Metric(k, scores[k][f"fold-{i+1}"], epoch_now(), i)
            for i in range(n_folds))

    # Parse fit metrics
    try:
        meta_params = build_metadata.model.model_meta["history"]["params"]
    except KeyError:
        logger.debug(
            "Key 'build-metadata.model.history.params' not found found in metadata."
        )
    else:
        metrics.extend(
            Metric(k, float(getattr(build_metadata.model, k)), epoch_now(), 0)
            for k in ["model_training_duration_sec"])
        for m in meta_params["metrics"]:
            data = build_metadata.model.model_meta["history"][m]
            metrics.extend(
                Metric(m, float(x), timestamp=epoch_now(), step=i)
                for i, x in enumerate(data))
        params.extend(
            Param(k, str(meta_params[k]))
            for k in (p for p in meta_params if p != "metrics"))

    return metrics, params
Example #16
0
 def _get_param_from_file(parent_path, param_name):
     _validate_param_name(param_name)
     value = read_file(parent_path, param_name)
     return Param(param_name, value)
Example #17
0
def faculty_param_to_mlflow_param(faculty_param):
    return Param(key=faculty_param.key, value=faculty_param.value)
Example #18
0
 def to_mlflow_entity(self) -> Param:
     return Param(key=self.key, value=self.value)
Example #19
0
def _create_child_runs_for_parameter_search(cv_estimator,
                                            parent_run,
                                            child_tags=None):
    """
    Creates a collection of child runs for a parameter search training session.
    Runs are reconstructed from the `cv_results_` attribute of the specified trained
    parameter search estimator - `cv_estimator`, which provides relevant performance
    metrics for each point in the parameter search space. One child run is created
    for each point in the parameter search space. For additional information, see
    `https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html`_. # noqa: E501

    :param cv_estimator: The trained parameter search estimator for which to create
                         child runs.
    :param parent_run: A py:class:`mlflow.entities.Run` object referring to the parent
                       parameter search run for which child runs should be created.
    :param child_tags: An optional dictionary of MLflow tag keys and values to log
                       for each child run.
    """
    import pandas as pd

    client = MlflowClient()
    # Use the start time of the parent parameter search run as a rough estimate for the
    # start time of child runs, since we cannot precisely determine when each point
    # in the parameter search space was explored
    child_run_start_time = parent_run.info.start_time
    child_run_end_time = int(time.time() * 1000)

    seed_estimator = cv_estimator.estimator
    # In the unlikely case that a seed of a parameter search estimator is,
    # itself, a parameter search estimator, we should avoid logging the untuned
    # parameters of the seeds's seed estimator
    should_log_params_deeply = not _is_parameter_search_estimator(
        seed_estimator)
    # Each row of `cv_results_` only provides parameters that vary across
    # the user-specified parameter grid. In order to log the complete set
    # of parameters for each child run, we fetch the parameters defined by
    # the seed estimator and update them with parameter subset specified
    # in the result row
    base_params = seed_estimator.get_params(deep=should_log_params_deeply)

    cv_results_df = pd.DataFrame.from_dict(cv_estimator.cv_results_)
    for _, result_row in cv_results_df.iterrows():
        tags_to_log = dict(child_tags) if child_tags else {}
        tags_to_log.update({MLFLOW_PARENT_RUN_ID: parent_run.info.run_id})
        tags_to_log.update(_get_estimator_info_tags(seed_estimator))
        child_run = client.create_run(
            experiment_id=parent_run.info.experiment_id,
            start_time=child_run_start_time,
            tags=tags_to_log,
        )

        from itertools import zip_longest

        params_to_log = dict(base_params)
        params_to_log.update(result_row.get("params", {}))
        param_batches_to_log = _chunk_dict(
            params_to_log, chunk_size=MAX_PARAMS_TAGS_PER_BATCH)

        # Parameters values are recorded twice in the set of search `cv_results_`:
        # once within a `params` column with dictionary values and once within
        # a separate dataframe column that is created for each parameter. To prevent
        # duplication of parameters, we log the consolidated values from the parameter
        # dictionary column and filter out the other parameter-specific columns with
        # names of the form `param_{param_name}`. Additionally, `cv_results_` produces
        # metrics for each training split, which is fairly verbose; accordingly, we filter
        # out per-split metrics in favor of aggregate metrics (mean, std, etc.)
        excluded_metric_prefixes = ["param", "split"]
        metric_batches_to_log = _chunk_dict(
            {
                key: value
                for key, value in result_row.iteritems() if not any([
                    key.startswith(prefix)
                    for prefix in excluded_metric_prefixes
                ]) and isinstance(value, Number)
            },
            chunk_size=min(MAX_ENTITIES_PER_BATCH - MAX_PARAMS_TAGS_PER_BATCH,
                           MAX_METRICS_PER_BATCH),
        )

        for params_batch, metrics_batch in zip_longest(param_batches_to_log,
                                                       metric_batches_to_log,
                                                       fillvalue={}):
            # Trim any parameter keys / values and metric keys that exceed the limits
            # imposed by corresponding MLflow Tracking APIs (e.g., LogParam, LogMetric)
            truncated_params_batch = _truncate_dict(params_batch,
                                                    MAX_ENTITY_KEY_LENGTH,
                                                    MAX_PARAM_VAL_LENGTH)
            truncated_metrics_batch = _truncate_dict(
                metrics_batch, max_key_length=MAX_ENTITY_KEY_LENGTH)
            client.log_batch(
                run_id=child_run.info.run_id,
                params=[
                    Param(str(key), str(value))
                    for key, value in truncated_params_batch.items()
                ],
                metrics=[
                    Metric(key=str(key),
                           value=value,
                           timestamp=child_run_end_time,
                           step=0)
                    for key, value in truncated_metrics_batch.items()
                ],
            )

        client.set_terminated(run_id=child_run.info.run_id,
                              end_time=child_run_end_time)
Example #20
0
EXPERIMENT_ID = 12

NAME = "experiment name"
ARTIFACT_LOCATION = "scheme://artifact-location"

METRIC_TIMESTAMP = datetime(2019, 3, 13, 17, 0, 15, tzinfo=UTC)
METRIC_TIMESTAMP_MILLISECONDS = to_timestamp(METRIC_TIMESTAMP) * 1000
FACULTY_METRIC = FacultyMetric(key="metric-key",
                               value="metric-value",
                               timestamp=METRIC_TIMESTAMP,
                               step=0)
MLFLOW_METRIC = Metric("metric-key", "metric-value",
                       METRIC_TIMESTAMP_MILLISECONDS, 0)

FACULTY_PARAM = FacultyParam(key="param-key", value="param-value")
MLFLOW_PARAM = Param("param-key", "param-value")

FACULTY_TAG = FacultyTag(key="tag-key", value="tag-value")
MLFLOW_TAG = RunTag("tag-key", "tag-value")

FACULTY_EXPERIMENT = FacultyExperiment(
    id=EXPERIMENT_ID,
    name=NAME,
    description="not used",
    artifact_location=ARTIFACT_LOCATION,
    created_at=datetime.now(tz=UTC),
    last_updated_at=datetime.now(tz=UTC),
    deleted_at=None,
)

RUN_UUID = uuid4()
Example #21
0
def test_log_batch_handler_success(mock_get_request_message,
                                   mock_get_request_json, tmpdir):
    # Test success cases for the LogBatch API
    def _test_log_batch_helper_success(metric_entities,
                                       param_entities,
                                       tag_entities,
                                       expected_metrics=None,
                                       expected_params=None,
                                       expected_tags=None):
        """
        Simulates a LogBatch API request using the provided metrics/params/tags, asserting that it
        succeeds & that the backing store contains either the set of expected metrics/params/tags
        (if provided) or, by default, the metrics/params/tags used in the API request.
        """
        with mlflow.start_run() as active_run:
            run_id = active_run.info.run_uuid
            mock_get_request_message.return_value = LogBatch(
                run_id=run_id,
                metrics=[m.to_proto() for m in metric_entities],
                params=[p.to_proto() for p in param_entities],
                tags=[t.to_proto() for t in tag_entities])
            response = _log_batch()
            assert response.status_code == 200
            json_response = json.loads(response.get_data())
            assert json_response == {}
            _assert_logged_entities(run_id, expected_metrics
                                    or metric_entities, expected_params
                                    or param_entities, expected_tags
                                    or tag_entities)

    store = FileStore(tmpdir.strpath)
    mock_get_request_json.return_value = "{}"  # Mock request JSON so it passes length validation
    server_patch = mock.patch('mlflow.server.handlers._get_store',
                              return_value=store)
    client_patch = mock.patch('mlflow.tracking.utils._get_store',
                              return_value=store)
    with server_patch, client_patch:
        mlflow.set_experiment("log-batch-experiment")
        # Log an empty payload
        _test_log_batch_helper_success([], [], [])
        # Log multiple metrics/params/tags
        _test_log_batch_helper_success(metric_entities=[
            Metric(key="m-key", value=3.2 * i, timestamp=i) for i in range(3)
        ],
                                       param_entities=[
                                           Param(key="p-key-%s" % i,
                                                 value="p-val-%s" % i)
                                           for i in range(4)
                                       ],
                                       tag_entities=[
                                           RunTag(key="t-key-%s" % i,
                                                  value="t-val-%s" % i)
                                           for i in range(5)
                                       ])
        # Log metrics with the same key
        _test_log_batch_helper_success(metric_entities=[
            Metric(key="m-key", value=3.2 * i, timestamp=3) for i in range(3)
        ],
                                       param_entities=[],
                                       tag_entities=[])
        # Log tags with the same key, verify the last one gets written
        same_key_tags = [
            RunTag(key="t-key", value="t-val-%s" % i) for i in range(5)
        ]
        _test_log_batch_helper_success(metric_entities=[],
                                       param_entities=[],
                                       tag_entities=same_key_tags,
                                       expected_tags=[same_key_tags[-1]])
Example #22
0
from mlflow.entities import Metric, Param, RunTag

client = MlflowClient()
experiment_id = client.create_experiment("new expenza")
# client.set_experiment(experiment_id)


def print_run_info(r):
    print(f"run_id: {r.info.run_id}")
    print(f"params: {r.data.params}")
    print(f"metrics: {r.data.metrics}")
    print(f"tags: {r.data.tags}")
    print(f"status: {r.info.status}")


# Create Mlflow entities and a run under the default experiment
# (whose id is "0")
timestamp = int(time.time() * 1000)
metrics = [Metric("m", 1.5, timestamp, 1)]
params = [Param("p", "p")]
tags = [RunTag("t", "t")]
experiment_id = "1"

run = client.create_run(experiment_id=experiment_id)

# log entities, terminate the run, and fetch run status
client.log_batch(run.info.run_id, metrics=metrics, params=params, tags=tags)
client.set_terminated(run.info.run_id)
run = client.get_run(run.info.run_id)
print_run_info(run)
Example #23
0
 def report_params(self, m: Dict[str, Union[str, Dict]]) -> None:
   m = ua.flatten(m)
   self._log_batch(params=[Param(k, str(v)) for k, v in m.items()])
Example #24
0
def _list_to_run_param(l):
    return [Param(key=d["key"], value=d["value"]) for d in l]
Example #25
0
 def test_log_batch_params_idempotency(self):
     run = self._run_factory()
     params = [Param("p-key", "p-val")]
     self.store.log_batch(run.info.run_uuid, metrics=[], params=params, tags=[])
     self.store.log_batch(run.info.run_uuid, metrics=[], params=params, tags=[])
     self._verify_logged(run.info.run_uuid, metrics=[], params=params, tags=[])
Example #26
0
                        tags=[])
run_info = RunInfo(run_uuid="1",
                   run_id="1",
                   experiment_id="experiment_id",
                   user_id="unknown",
                   status=RunStatus.to_string(RunStatus.RUNNING),
                   start_time=1,
                   end_time=None,
                   lifecycle_stage=LifecycleStage.ACTIVE,
                   artifact_uri="artifact_uri")
run_data = RunData(metrics=[], params=[], tags=[])
run = Run(run_info=run_info, run_data=run_data)

metric = Metric(key="metric1", value=1, timestamp=1, step=1)

param = Param(key="param1", value="val1")

tag = RunTag(key="tag1", value="val1")

experiment_tag = ExperimentTag(key="tag1", value="val1")


@mock.patch(
    "mlflow_elasticsearchstore.elasticsearch_store.ElasticsearchStore.list_experiments"
)
@pytest.mark.usefixtures('create_mlflow_client')
def test_list_experiments(list_experiments_mock, create_mlflow_client):
    create_mlflow_client.list_experiments(ViewType.ACTIVE_ONLY)
    list_experiments_mock.assert_called_once_with(
        view_type=ViewType.ACTIVE_ONLY)
Example #27
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        user_name = "mock user"
        source_name = "rest test"

        source_name_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_name",
            return_value=source_name)
        source_type_patch = mock.patch(
            "mlflow.tracking.context.default_context._get_source_type",
            return_value=SourceType.LOCAL)
        with mock.patch('mlflow.store.rest_store.http_request') as mock_http, \
                mock.patch('mlflow.tracking.utils._get_store', return_value=store), \
                mock.patch('mlflow.tracking.context.default_context._get_user',
                           return_value=user_name), \
                mock.patch('time.time', return_value=13579), \
                source_name_patch, source_type_patch:
            with mlflow.start_run(experiment_id="43"):
                cr_body = message_to_json(
                    CreateRun(experiment_id="43",
                              user_id=user_name,
                              start_time=13579000,
                              tags=[
                                  ProtoRunTag(key='mlflow.source.name',
                                              value=source_name),
                                  ProtoRunTag(key='mlflow.source.type',
                                              value='LOCAL'),
                                  ProtoRunTag(key='mlflow.user',
                                              value=user_name)
                              ]))
                expected_kwargs = self._args(creds, "runs/create", "POST",
                                             cr_body)

                assert mock_http.call_count == 1
                actual_kwargs = mock_http.call_args[1]

                # Test the passed tag values separately from the rest of the request
                # Tag order is inconsistent on Python 2 and 3, but the order does not matter
                expected_tags = expected_kwargs['json'].pop('tags')
                actual_tags = actual_kwargs['json'].pop('tags')
                assert (sorted(expected_tags,
                               key=lambda t: t['key']) == sorted(
                                   actual_tags, key=lambda t: t['key']))
                assert expected_kwargs == actual_kwargs

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(
                LogParam(run_uuid="some_uuid",
                         run_id="some_uuid",
                         key="k1",
                         value="v1"))
            self._verify_requests(mock_http, creds, "runs/log-parameter",
                                  "POST", body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000))
            body = message_to_json(
                SetTag(run_uuid="some_uuid",
                       run_id="some_uuid",
                       key="t1",
                       value="abcd" * 1000))
            self._verify_requests(mock_http, creds, "runs/set-tag", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.delete_tag("some_uuid", "t1")
            body = message_to_json(DeleteTag(run_id="some_uuid", key="t1"))
            self._verify_requests(mock_http, creds, "runs/delete-tag", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345, 3))
            body = message_to_json(
                LogMetric(run_uuid="u2",
                          run_id="u2",
                          key="m1",
                          value=0.87,
                          timestamp=12345,
                          step=3))
            self._verify_requests(mock_http, creds, "runs/log-metric", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            metrics = [
                Metric("m1", 0.87, 12345, 0),
                Metric("m2", 0.49, 12345, -1),
                Metric("m3", 0.58, 12345, 2)
            ]
            params = [Param("p1", "p1val"), Param("p2", "p2val")]
            tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")]
            store.log_batch(run_id="u2",
                            metrics=metrics,
                            params=params,
                            tags=tags)
            metric_protos = [metric.to_proto() for metric in metrics]
            param_protos = [param.to_proto() for param in params]
            tag_protos = [tag.to_proto() for tag in tags]
            body = message_to_json(
                LogBatch(run_id="u2",
                         metrics=metric_protos,
                         params=param_protos,
                         tags=tag_protos))
            self._verify_requests(mock_http, creds, "runs/log-batch", "POST",
                                  body)

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds, "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds, "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.delete_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/delete", "POST",
                message_to_json(DeleteExperiment(experiment_id="0")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            store.restore_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/restore", "POST",
                message_to_json(RestoreExperiment(experiment_id="0")))

        with mock.patch('mlflow.store.rest_store.http_request') as mock_http:
            response = mock.MagicMock
            response.text = '{"runs": ["1a", "2b", "3c"], "next_page_token": "67890fghij"}'
            mock_http.return_value = response
            result = store.search_runs(["0", "1"],
                                       "params.p1 = 'a'",
                                       ViewType.ACTIVE_ONLY,
                                       max_results=10,
                                       order_by=["a"],
                                       page_token="12345abcde")

            expected_message = SearchRuns(experiment_ids=["0", "1"],
                                          filter="params.p1 = 'a'",
                                          run_view_type=ViewType.to_proto(
                                              ViewType.ACTIVE_ONLY),
                                          max_results=10,
                                          order_by=["a"],
                                          page_token="12345abcde")
            self._verify_requests(mock_http, creds, "runs/search", "POST",
                                  message_to_json(expected_message))
            assert result.token == "67890fghij"
Example #28
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        user_name = "mock user"
        run_name = "rest run"
        source_name = "rest test"

        with mock.patch('mlflow.store.rest_store.http_request_safe') as mock_http, \
                mock.patch('mlflow.tracking.utils._get_store', return_value=store), \
                mock.patch('mlflow.tracking.client._get_user_id', return_value=user_name), \
                mock.patch('time.time', return_value=13579):
            with mlflow.start_run(experiment_id="43",
                                  run_name=run_name,
                                  source_name=source_name):
                cr_body = message_to_json(
                    CreateRun(experiment_id="43",
                              run_name='',
                              user_id=user_name,
                              source_type=SourceType.LOCAL,
                              source_name=source_name,
                              start_time=13579000,
                              tags=[
                                  ProtoRunTag(key='mlflow.source.name',
                                              value=source_name),
                                  ProtoRunTag(key='mlflow.source.type',
                                              value='LOCAL')
                              ]))
                st_body = message_to_json(
                    SetTag(run_uuid='', key='mlflow.runName', value=run_name))
                assert mock_http.call_count == 2
                exp_calls = [("runs/create", "POST", cr_body),
                             ("runs/set-tag", "POST", st_body)]
                self._verify_request_has_calls(mock_http, creds, exp_calls)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(
                LogParam(run_uuid="some_uuid", key="k1", value="v1"))
            self._verify_requests(mock_http, creds, "runs/log-parameter",
                                  "POST", body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000))
            body = message_to_json(
                SetTag(run_uuid="some_uuid", key="t1", value="abcd" * 1000))
            self._verify_requests(mock_http, creds, "runs/set-tag", "POST",
                                  body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345, 3))
            body = message_to_json(
                LogMetric(run_uuid="u2",
                          key="m1",
                          value=0.87,
                          timestamp=12345,
                          step=3))
            self._verify_requests(mock_http, creds, "runs/log-metric", "POST",
                                  body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            metrics = [
                Metric("m1", 0.87, 12345, 0),
                Metric("m2", 0.49, 12345, -1),
                Metric("m3", 0.58, 12345, 2)
            ]
            params = [Param("p1", "p1val"), Param("p2", "p2val")]
            tags = [RunTag("t1", "t1val"), RunTag("t2", "t2val")]
            store.log_batch(run_id="u2",
                            metrics=metrics,
                            params=params,
                            tags=tags)
            metric_protos = [metric.to_proto() for metric in metrics]
            param_protos = [param.to_proto() for param in params]
            tag_protos = [tag.to_proto() for tag in tags]
            body = message_to_json(
                LogBatch(run_id="u2",
                         metrics=metric_protos,
                         params=param_protos,
                         tags=tag_protos))
            self._verify_requests(mock_http, creds, "runs/log-batch", "POST",
                                  body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds, "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds, "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.delete_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/delete", "POST",
                message_to_json(DeleteExperiment(experiment_id="0")))

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.restore_experiment("0")
            self._verify_requests(
                mock_http, creds, "experiments/restore", "POST",
                message_to_json(RestoreExperiment(experiment_id="0")))
Example #29
0
 def log_param(self, run_id, key, value):
     """Logs a parameter against the given run id. Value will be converted to a string."""
     _validate_param_name(key)
     param = Param(key, str(value))
     self.store.log_param(run_id, param)
Example #30
0
 def report_params(self, m: Dict[str, Union[str, Dict]]) -> None:
     flat_m = ua.flatten(m)
     self._log_batch(params=[
         Param(sanitize_key(k), _sanitize_param_value(str(v)))
         for k, v in flat_m.items()
     ])