Exemple #1
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the initialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        if isinstance(model, SqlRun):
            if base is RunData:
                # Run data contains list for metrics, params and tags
                # so obj will be a list so we need to convert those items
                if k == 'metrics':
                    # only get latest recorded metrics per key
                    metrics = {}
                    for o in obj:
                        if o.key not in metrics or o.timestamp > metrics.get(o.key).timestamp:
                            metrics[o.key] = Metric(o.key, o.value, o.timestamp)
                    obj = metrics.values()
                elif k == 'params':
                    obj = [Param(o.key, o.value) for o in obj]
                elif k == 'tags':
                    obj = [RunTag(o.key, o.value) for o in obj]
            elif base is RunInfo:
                if k == 'source_type':
                    obj = SourceType.from_string(obj)
                elif k == "status":
                    obj = RunStatus.from_string(obj)

        config[k] = obj
    return base(**config)
Exemple #2
0
def test_client_create_run_overrides(mock_store):

    experiment_id = mock.Mock()
    user_id = mock.Mock()
    run_name = mock.Mock()
    start_time = mock.Mock()
    tags = {
        MLFLOW_PARENT_RUN_ID: mock.Mock(),
        MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
        MLFLOW_SOURCE_NAME: mock.Mock(),
        MLFLOW_PROJECT_ENTRY_POINT: mock.Mock(),
        MLFLOW_GIT_COMMIT: mock.Mock(),
        "other-key": "other-value"
    }

    MlflowClient().create_run(experiment_id, user_id, run_name, start_time,
                              tags)

    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=user_id,
        run_name=run_name,
        start_time=start_time,
        tags=[RunTag(key, value) for key, value in tags.items()],
        parent_run_id=tags[MLFLOW_PARENT_RUN_ID],
        source_type=SourceType.JOB,
        source_name=tags[MLFLOW_SOURCE_NAME],
        entry_point_name=tags[MLFLOW_PROJECT_ENTRY_POINT],
        source_version=tags[MLFLOW_GIT_COMMIT])
Exemple #3
0
    def create_run(self, experiment_id, user_id, run_name, source_type,
                   source_name, entry_point_name, start_time, source_version,
                   tags, parent_run_id):
        """
        Create a run under the specified experiment ID, setting the run's status to "RUNNING"
        and the start time to the current time.

        :param experiment_id: ID of the experiment for this run
        :param user_id: ID of the user launching this run
        :param source_type: Enum (integer) describing the source of the run

        :return: The created Run object
        """
        tag_protos = [tag.to_proto() for tag in tags]
        req_body = message_to_json(
            CreateRun(experiment_id=experiment_id,
                      user_id=user_id,
                      run_name="",
                      source_type=source_type,
                      source_name=source_name,
                      entry_point_name=entry_point_name,
                      start_time=start_time,
                      source_version=source_version,
                      tags=tag_protos,
                      parent_run_id=parent_run_id))
        response_proto = self._call_endpoint(CreateRun, req_body)
        run = Run.from_proto(response_proto.run)
        if run_name:
            # TODO: optimization: This is making 2 calls to backend store. Include with above call.
            self.set_tag(run.info.run_uuid,
                         RunTag(key=MLFLOW_RUN_NAME, value=run_name))
        return run
Exemple #4
0
    def create_run(self,
                   experiment_id,
                   user_id=None,
                   run_name=None,
                   source_type=None,
                   source_name=None,
                   entry_point_name=None,
                   start_time=None,
                   source_version=None,
                   tags=None):
        """Creates a new :py:class:`mlflow.entities.Run` object, which can be associated with
        metrics, parameters, artifacts, etc.
        Unlike :py:func:`mlflow.projects.run`, does not actually run code, just creates objects.
        Unlike :py:func:`mlflow.start_run`, this does not change the "active run" used by
        :py:func:`mlflow.log_param` and friends.

        :param user_id: If not provided, we will use the current user as a default.
        :param start_time: If not provided, we will use the current timestamp.
        :param tags: A dictionary of key-value pairs which will be converted into
          RunTag objects.
        :return: :py:class:`mlflow.entities.Run` which was created
        """
        tags = tags if tags else {}
        return self.store.create_run(
            experiment_id=experiment_id,
            user_id=user_id if user_id is not None else _get_user_id(),
            run_name=run_name,
            source_type=source_type,
            source_name=source_name,
            entry_point_name=entry_point_name,
            start_time=start_time or int(time.time() * 1000),
            source_version=source_version,
            tags=[RunTag(key, value) for (key, value) in iteritems(tags)],
        )
Exemple #5
0
 def _create():
     run_uuid = str(uuid.uuid4())
     experiment_id = random_int(10, 2000)
     name = random_str(random_int(10, 40))
     source_type = random_int(1, 4)
     source_name = random_str(random_int(100, 300))
     entry_point_name = random_str(random_int(100, 300))
     user_id = random_str(random_int(10, 25))
     status = random_int(1, 5)
     start_time = random_int(1, 10)
     end_time = start_time + random_int(1, 10)
     source_version = random_str(random_int(10, 40))
     tags = [
         RunTag(key=random_str(random_int(1, 5)),
                value=random_str(random_int(1, 5))) for _ in range(2)
     ]
     artifact_uri = random_str(random_int(10, 40))
     ri = RunInfo(run_uuid=run_uuid,
                  experiment_id=experiment_id,
                  name=name,
                  source_type=source_type,
                  source_name=source_name,
                  entry_point_name=entry_point_name,
                  user_id=user_id,
                  status=status,
                  start_time=start_time,
                  end_time=end_time,
                  source_version=source_version,
                  tags=tags,
                  artifact_uri=artifact_uri)
     return (ri, run_uuid, experiment_id, name, source_type, source_name,
             entry_point_name, user_id, status, start_time, end_time,
             source_version, tags, artifact_uri)
Exemple #6
0
def test_log_batch(tracking_uri_mock):
    expected_metrics = {"metric-key0": 1.0, "metric-key1": 4.0}
    expected_params = {"param-key0": "param-val0", "param-key1": "param-val1"}
    exact_expected_tags = {"tag-key0": "tag-val0", "tag-key1": "tag-val1"}
    approx_expected_tags = set([MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])

    t = int(time.time())
    metrics = [Metric(key=key, value=value, timestamp=t) for key, value in expected_metrics.items()]
    params = [Param(key=key, value=value) for key, value in expected_params.items()]
    tags = [RunTag(key=key, value=value) for key, value in exact_expected_tags.items()]

    active_run = start_run()
    run_uuid = active_run.info.run_uuid
    with active_run:
        mlflow.tracking.MlflowClient().log_batch(run_id=run_uuid, metrics=metrics, params=params,
                                                 tags=tags)
    finished_run = tracking.MlflowClient().get_run(run_uuid)
    # Validate metrics
    assert len(finished_run.data.metrics) == 2
    for key, value in finished_run.data.metrics.items():
        assert expected_metrics[key] == value
    # Validate tags (for automatically-set tags)
    assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_value in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert exact_expected_tags[tag_key] == tag_value
    # Validate params
    assert finished_run.data.params == expected_params
Exemple #7
0
    def create_run(self, experiment_id, start_time=None, tags=None):
        """
        Create a :py:class:`mlflow.entities.Run` object that can be associated with
        metrics, parameters, artifacts, etc.
        Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code.
        Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by
        :py:func:`mlflow.log_param`.

        :param experiment_id: The ID of then experiment to create a run in.
        :param start_time: If not provided, use the current timestamp.
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.RunTag` objects.
        :return: :py:class:`mlflow.entities.Run` that was created.
        """

        tags = tags if tags else {}

        # Extract user from tags
        # This logic is temporary; the user_id attribute of runs is deprecated and will be removed
        # in a later release.
        user_id = tags.get(MLFLOW_USER, "unknown")

        return self.store.create_run(
            experiment_id=experiment_id,
            user_id=user_id,
            start_time=start_time or int(time.time() * 1000),
            tags=[RunTag(key, value) for (key, value) in tags.items()],
        )
Exemple #8
0
    def _log_dataset_tag(self, client, run_id, model_uuid):
        """
        Log dataset metadata as a tag "mlflow.datasets", if the tag already exists, it will
        append current dataset metadata into existing tag content.
        """
        existing_dataset_metadata_str = client.get_run(run_id).data.tags.get(
            "mlflow.datasets", "[]"
        )
        dataset_metadata_list = json.loads(existing_dataset_metadata_str)

        for metadata in dataset_metadata_list:
            if (
                metadata["hash"] == self.hash
                and metadata["name"] == self.name
                and metadata["model"] == model_uuid
            ):
                break
        else:
            dataset_metadata_list.append({**self._metadata, "model": model_uuid})

        dataset_metadata_str = json.dumps(dataset_metadata_list, separators=(",", ":"))
        client.log_batch(
            run_id,
            tags=[RunTag("mlflow.datasets", dataset_metadata_str)],
        )
Exemple #9
0
 def _create():
     metrics = [Metric(random_str(10),
                       random_int(0, 1000), int(time.time()) + random_int(-1e4, 1e4))]
     params = [Param(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     tags = [RunTag(random_str(10), random_str(random_int(10, 35))) for _ in range(10)]  # noqa
     rd = RunData(metrics=metrics, params=params, tags=tags)
     return rd, metrics, params, tags
Exemple #10
0
def test_validate_batch_log_limits():
    too_many_metrics = [Metric("metric-key-%s" % i, 1, 0) for i in range(1001)]
    too_many_params = [Param("param-key-%s" % i, "b") for i in range(101)]
    too_many_tags = [RunTag("tag-key-%s" % i, "b") for i in range(101)]

    good_kwargs = {"metrics": [], "params": [], "tags": []}
    bad_kwargs = {
        "metrics": [too_many_metrics],
        "params": [too_many_params],
        "tags": [too_many_tags],
    }
    for arg_name, arg_values in bad_kwargs.items():
        for arg_value in arg_values:
            final_kwargs = copy.deepcopy(good_kwargs)
            final_kwargs[arg_name] = arg_value
            with pytest.raises(MlflowException):
                _validate_batch_log_limits(**final_kwargs)
    # Test the case where there are too many entities in aggregate
    with pytest.raises(MlflowException):
        _validate_batch_log_limits(too_many_metrics[:900],
                                   too_many_params[:51], too_many_tags[:50])
    # Test that we don't reject entities within the limit
    _validate_batch_log_limits(too_many_metrics[:1000], [], [])
    _validate_batch_log_limits([], too_many_params[:100], [])
    _validate_batch_log_limits([], [], too_many_tags[:100])
Exemple #11
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.RunTag`.
        """
        return RunTag(key=self.key, value=self.value)
Exemple #12
0
def set_tags(tags):
    """
    Log a batch of tags for the current run. If no run is active, this method will create a
    new active run.

    :param tags: Dictionary of tag_name: String -> value: (String, but will be string-ified if
                 not)
    :returns: None

    .. code-block:: python
        :caption: Example

        import mlflow

        tags = {"engineering": "ML Platform",
                "release.candidate": "RC1",
                "release.version": "2.2.0"}

        # Set a batch of tags
        with mlflow.start_run():
            mlflow.set_tags(tags)
    """
    run_id = _get_or_start_run().info.run_id
    tags_arr = [RunTag(key, str(value)) for key, value in tags.items()]
    MlflowClient().log_batch(run_id=run_id, metrics=[], params=[], tags=tags_arr)
    def import_run_data(self, run_dct, run_id, src_user_id):
        from mlflow.entities import Metric, Param, RunTag
        now = round(time.time())
        params = [Param(k, v) for k, v in run_dct["params"].items()]
        metrics = [
            Metric(k, v, now, 0) for k, v in run_dct["metrics"].items()
        ]  # TODO: missing timestamp and step semantics?

        tags = run_dct["tags"]
        if not self.import_mlflow_tags:  # remove mlflow tags
            tags = {
                k: v
                for k, v in tags.items()
                if not k.startswith(utils.TAG_PREFIX_MLFLOW)
            }
        if not self.import_metadata_tags:  # remove mlflow_export_import tags
            tags = {
                k: v
                for k, v in tags.items()
                if not k.startswith(utils.TAG_PREFIX_METADATA)
            }
        tags = utils.create_mlflow_tags_for_databricks_import(
            tags
        )  # remove "mlflow" tags that cannot be imported into Databricks

        tags = [RunTag(k, str(v)) for k, v in tags.items()]

        #self.dump_tags(tags,"1") # debug
        if not self.in_databricks:
            utils.set_dst_user_id(tags, src_user_id, self.use_src_user_id)
        #self.dump_tags(tags,"2") # debug
        self.client.log_batch(run_id, metrics, params, tags)
Exemple #14
0
 def set_tag(self, run_id, key, value):
     """
     Set a tag on the run ID. Value is converted to a string.
     """
     _validate_tag_name(key)
     tag = RunTag(key, str(value))
     self.store.set_tag(run_id, tag)
Exemple #15
0
 def test_log_batch_allows_tag_overwrite(self):
     fs = FileStore(self.test_root)
     run = self._create_run(fs)
     fs.log_batch(run.info.run_id,
                  metrics=[],
                  params=[],
                  tags=[RunTag("t-key", "val")])
     fs.log_batch(run.info.run_id,
                  metrics=[],
                  params=[],
                  tags=[RunTag("t-key", "newval")])
     self._verify_logged(fs,
                         run.info.run_id,
                         metrics=[],
                         params=[],
                         tags=[RunTag("t-key", "newval")])
Exemple #16
0
 def test_weird_tag_names(self):
     WEIRD_TAG_NAME = "this is/a weird/but valid tag"
     fs = FileStore(self.test_root)
     run_id = self.exp_data[FileStore.DEFAULT_EXPERIMENT_ID]["runs"][0]
     fs.set_tag(run_id, RunTag(WEIRD_TAG_NAME, "Muhahaha!"))
     run = fs.get_run(run_id)
     assert run.data.tags[WEIRD_TAG_NAME] == "Muhahaha!"
Exemple #17
0
 def test_unicode_tag(self):
     fs = FileStore(self.test_root)
     run_id = self.exp_data[FileStore.DEFAULT_EXPERIMENT_ID]["runs"][0]
     value = u"𝐼 𝓈𝑜𝓁𝑒𝓂𝓃𝓁𝓎 𝓈𝓌𝑒𝒶𝓇 𝓉𝒽𝒶𝓉 𝐼 𝒶𝓂 𝓊𝓅 𝓉𝑜 𝓃𝑜 𝑔𝑜𝑜𝒹"
     fs.set_tag(run_id, RunTag("message", value))
     tags = fs.get_run(run_id).data.tags
     assert tags["message"] == value
Exemple #18
0
    def test_log_batch_internal_error(self):
        # Verify that internal errors during log_batch result in MlflowExceptions
        fs = FileStore(self.test_root)
        run = self._create_run(fs)

        def _raise_exception_fn(*args, **kwargs):  # pylint: disable=unused-argument
            raise Exception("Some internal error")
        with mock.patch("mlflow.store.file_store.FileStore.log_metric") as log_metric_mock, \
                mock.patch("mlflow.store.file_store.FileStore.log_param") as log_param_mock, \
                mock.patch("mlflow.store.file_store.FileStore.set_tag") as set_tag_mock:
            log_metric_mock.side_effect = _raise_exception_fn
            log_param_mock.side_effect = _raise_exception_fn
            set_tag_mock.side_effect = _raise_exception_fn
            for kwargs in [{
                    "metrics": [Metric("a", 3, 1, 0)]
            }, {
                    "params": [Param("b", "c")]
            }, {
                    "tags": [RunTag("c", "d")]
            }]:
                log_batch_kwargs = {"metrics": [], "params": [], "tags": []}
                log_batch_kwargs.update(kwargs)
                print(log_batch_kwargs)
                with self.assertRaises(MlflowException) as e:
                    fs.log_batch(run.info.run_id, **log_batch_kwargs)
                self.assertIn(str(e.exception.message), "Some internal error")
                assert e.exception.error_code == ErrorCode.Name(INTERNAL_ERROR)
Exemple #19
0
 def test_log_batch_tags_idempotency(self):
     fs = FileStore(self.test_root)
     run = self._create_run(fs)
     fs.log_batch(run.info.run_id,
                  metrics=[],
                  params=[],
                  tags=[RunTag("t-key", "t-val")])
     fs.log_batch(run.info.run_id,
                  metrics=[],
                  params=[],
                  tags=[RunTag("t-key", "t-val")])
     self._verify_logged(fs,
                         run.info.run_id,
                         metrics=[],
                         params=[],
                         tags=[RunTag("t-key", "t-val")])
Exemple #20
0
    def record_logged_model(self, run_id, mlflow_model):
        from mlflow.models import Model

        if not isinstance(mlflow_model, Model):
            raise TypeError(
                "Argument 'mlflow_model' should be mlflow.models.Model, got '{}'".format(
                    type(mlflow_model)
                )
            )
        _validate_run_id(run_id)
        run_info = self._get_run_info(run_id)
        check_run_is_active(run_info)
        model_dict = mlflow_model.to_dict()
        run_info = self._get_run_info(run_id)
        path = self._get_tag_path(run_info.experiment_id, run_info.run_id, MLFLOW_LOGGED_MODELS)
        if os.path.exists(path):
            with open(path, "r") as f:
                model_list = json.loads(f.read())
        else:
            model_list = []
        tag = RunTag(MLFLOW_LOGGED_MODELS, json.dumps(model_list + [model_dict]))

        try:
            self._set_run_tag(run_info, tag)
        except Exception as e:
            raise MlflowException(e, INTERNAL_ERROR)
Exemple #21
0
def test_search_runs_data():
    runs = [
        create_run(
            metrics=[Metric("mse", 0.2, 0, 0)],
            params=[Param("param", "value")],
            tags=[RunTag("tag", "value")],
            start=1564675200000,
            end=1564683035000,
        ),
        create_run(
            metrics=[Metric("mse", 0.6, 0, 0),
                     Metric("loss", 1.2, 0, 5)],
            params=[Param("param2", "val"),
                    Param("k", "v")],
            tags=[RunTag("tag2", "v2")],
            start=1564765200000,
            end=1564783200000,
        ),
    ]
    with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs):
        pdf = search_runs()
        data = {
            "status": [RunStatus.FINISHED] * 2,
            "artifact_uri": [None] * 2,
            "run_id": [""] * 2,
            "experiment_id": [""] * 2,
            "metrics.mse": [0.2, 0.6],
            "metrics.loss": [np.nan, 1.2],
            "params.param": ["value", None],
            "params.param2": [None, "val"],
            "params.k": [None, "v"],
            "tags.tag": ["value", None],
            "tags.tag2": [None, "v2"],
            "start_time": [
                pd.to_datetime(1564675200000, unit="ms", utc=True),
                pd.to_datetime(1564765200000, unit="ms", utc=True),
            ],
            "end_time": [
                pd.to_datetime(1564683035000, unit="ms", utc=True),
                pd.to_datetime(1564783200000, unit="ms", utc=True),
            ],
        }
        expected_df = pd.DataFrame(data)
        pd.testing.assert_frame_equal(pdf,
                                      expected_df,
                                      check_like=True,
                                      check_frame_type=False)
Exemple #22
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags, parent_run_id):
     """
     Creates a run with the specified attributes.
     """
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id, databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME,
                _make_persisted_run_info_dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if parent_run_id:
         self.set_tag(run_uuid,
                      RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Exemple #23
0
def test_log_batch_handler_success(mock_get_request_message, mock_get_request_json, tmpdir):
    # Test success cases for the LogBatch API
    def _test_log_batch_helper_success(
            metric_entities, param_entities, tag_entities,
            expected_metrics=None, expected_params=None, expected_tags=None):
        """
        Simulates a LogBatch API request using the provided metrics/params/tags, asserting that it
        succeeds & that the backing store contains either the set of expected metrics/params/tags
        (if provided) or, by default, the metrics/params/tags used in the API request.
        """
        with mlflow.start_run() as active_run:
            run_id = active_run.info.run_uuid
            mock_get_request_message.return_value = LogBatch(
                run_id=run_id,
                metrics=[m.to_proto() for m in metric_entities],
                params=[p.to_proto() for p in param_entities],
                tags=[t.to_proto() for t in tag_entities])
            response = _log_batch()
            assert response.status_code == 200
            json_response = json.loads(response.get_data())
            assert json_response == {}
            _assert_logged_entities(
                run_id, expected_metrics or metric_entities, expected_params or param_entities,
                expected_tags or tag_entities)

    store = FileStore(tmpdir.strpath)
    mock_get_request_json.return_value = "{}"  # Mock request JSON so it passes length validation
    server_patch = mock.patch('mlflow.server.handlers._get_store', return_value=store)
    client_patch = mock.patch('mlflow.tracking.utils._get_store', return_value=store)
    with server_patch, client_patch:
        mlflow.set_experiment("log-batch-experiment")
        # Log an empty payload
        _test_log_batch_helper_success([], [], [])
        # Log multiple metrics/params/tags
        _test_log_batch_helper_success(
            metric_entities=[Metric(key="m-key", value=3.2 * i, timestamp=i) for i in range(3)],
            param_entities=[Param(key="p-key-%s" % i, value="p-val-%s" % i) for i in range(4)],
            tag_entities=[RunTag(key="t-key-%s" % i, value="t-val-%s" % i) for i in range(5)])
        # Log metrics with the same key
        _test_log_batch_helper_success(
            metric_entities=[Metric(key="m-key", value=3.2 * i, timestamp=3) for i in range(3)],
            param_entities=[], tag_entities=[])
        # Log tags with the same key, verify the last one gets written
        same_key_tags = [RunTag(key="t-key", value="t-val-%s" % i) for i in range(5)]
        _test_log_batch_helper_success(
            metric_entities=[], param_entities=[], tag_entities=same_key_tags,
            expected_tags=[same_key_tags[-1]])
Exemple #24
0
    def test_requestor(self, request):
        response = mock.MagicMock
        response.status_code = 200
        response.text = '{}'
        request.return_value = response

        creds = MlflowHostCreds('https://hello')
        store = RestStore(lambda: creds)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.log_param("some_uuid", Param("k1", "v1"))
            body = message_to_json(
                LogParam(run_uuid="some_uuid", key="k1", value="v1"))
            self._verify_requests(mock_http, creds, "runs/log-parameter",
                                  "POST", body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.set_tag("some_uuid", RunTag("t1", "abcd" * 1000))
            body = message_to_json(
                SetTag(run_uuid="some_uuid", key="t1", value="abcd" * 1000))
            self._verify_requests(mock_http, creds, "runs/set-tag", "POST",
                                  body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.log_metric("u2", Metric("m1", 0.87, 12345))
            body = message_to_json(
                LogMetric(run_uuid="u2", key="m1", value=0.87,
                          timestamp=12345))
            self._verify_requests(mock_http, creds, "runs/log-metric", "POST",
                                  body)

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.delete_run("u25")
            self._verify_requests(mock_http, creds, "runs/delete", "POST",
                                  message_to_json(DeleteRun(run_id="u25")))

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.restore_run("u76")
            self._verify_requests(mock_http, creds, "runs/restore", "POST",
                                  message_to_json(RestoreRun(run_id="u76")))

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.delete_experiment(0)
            self._verify_requests(
                mock_http, creds, "experiments/delete", "POST",
                message_to_json(DeleteExperiment(experiment_id=0)))

        with mock.patch(
                'mlflow.store.rest_store.http_request_safe') as mock_http:
            store.restore_experiment(0)
            self._verify_requests(
                mock_http, creds, "experiments/restore", "POST",
                message_to_json(RestoreExperiment(experiment_id=0)))
Exemple #25
0
def _set_tag():
    request_message = _get_request_message(SetTag())
    tag = RunTag(request_message.key, request_message.value)
    _get_store().set_tag(request_message.run_uuid, tag)
    response_message = SetTag.Response()
    response = Response(mimetype='application/json')
    response.set_data(message_to_json(response_message))
    return response
Exemple #26
0
def test_search_runs_data():
    import numpy as np
    import pandas as pd

    runs = [
        create_run(
            metrics=[Metric("mse", 0.2, 0, 0)],
            params=[Param("param", "value")],
            tags=[RunTag("tag", "value")],
            start=1564675200000,
            end=1564683035000,
        ),
        create_run(
            metrics=[Metric("mse", 0.6, 0, 0),
                     Metric("loss", 1.2, 0, 5)],
            params=[Param("param2", "val"),
                    Param("k", "v")],
            tags=[RunTag("tag2", "v2")],
            start=1564765200000,
            end=1564783200000,
        ),
    ]
    with mock.patch("mlflow.tracking.fluent._paginate", return_value=runs):
        pdf = search_runs()
        data = {
            "status": [RunStatus.FINISHED] * 2,
            "artifact_uri": [None] * 2,
            "run_id": [""] * 2,
            "experiment_id": [""] * 2,
            "metrics.mse": [0.2, 0.6],
            "metrics.loss": [np.nan, 1.2],
            "params.param": ["value", None],
            "params.param2": [None, "val"],
            "params.k": [None, "v"],
            "tags.tag": ["value", None],
            "tags.tag2": [None, "v2"],
            "start_time": [
                pd.to_datetime(1564675200000, unit="ms", utc=True),
                pd.to_datetime(1564765200000, unit="ms", utc=True),
            ],
            "end_time": [
                pd.to_datetime(1564683035000, unit="ms", utc=True),
                pd.to_datetime(1564783200000, unit="ms", utc=True),
            ],
        }
        validate_search_runs(pdf, data, "pandas")
Exemple #27
0
def faculty_run_to_mlflow_run(faculty_run):
    lifecycle_stage = (LifecycleStage.ACTIVE if faculty_run.deleted_at is None
                       else LifecycleStage.DELETED)
    start_time = _datetime_to_mlflow_timestamp(faculty_run.started_at)
    end_time = (_datetime_to_mlflow_timestamp(faculty_run.ended_at)
                if faculty_run.ended_at is not None else None)

    tag_dict = {tag.key: tag.value for tag in faculty_run.tags}

    extra_mlflow_tags = []

    # Set run name tag if set as attribute but not already a tag
    if MLFLOW_RUN_NAME not in tag_dict and faculty_run.name:
        extra_mlflow_tags.append(RunTag(MLFLOW_RUN_NAME, faculty_run.name))

    # Set parent run ID tag if set as attribute but not already a tag
    if (MLFLOW_PARENT_RUN_ID not in tag_dict
            and faculty_run.parent_run_id is not None):
        extra_mlflow_tags.append(
            RunTag(MLFLOW_PARENT_RUN_ID, faculty_run.parent_run_id.hex))

    run_info = RunInfo(
        run_uuid=faculty_run.id.hex,
        experiment_id=str(faculty_run.experiment_id),
        user_id="",
        status=_FACULTY_TO_MLFLOW_RUN_STATUS_MAP[faculty_run.status],
        start_time=start_time,
        end_time=end_time,
        lifecycle_stage=lifecycle_stage,
        artifact_uri=faculty_run.artifact_location,
        run_id=faculty_run.id.hex,
    )
    run_data = RunData(
        params=[
            faculty_param_to_mlflow_param(param)
            for param in faculty_run.params
        ],
        metrics=[
            faculty_metric_to_mlflow_metric(metric)
            for metric in faculty_run.metrics
        ],
        tags=[faculty_tag_to_mlflow_tag(tag)
              for tag in faculty_run.tags] + extra_mlflow_tags,
    )
    run = Run(run_info, run_data)
    return run
Exemple #28
0
 def test_weird_tag_names(self):
     WEIRD_TAG_NAME = "this is/a weird/but valid tag"
     fs = FileStore(self.test_root)
     run_uuid = self.exp_data[0]["runs"][0]
     fs.set_tag(run_uuid, RunTag(WEIRD_TAG_NAME, "Muhahaha!"))
     tag = fs.get_run(run_uuid).data.tags[0]
     assert tag.key == WEIRD_TAG_NAME
     assert tag.value == "Muhahaha!"
Exemple #29
0
 def test_unicode_tag(self):
     fs = FileStore(self.test_root)
     run_uuid = self.exp_data[0]["runs"][0]
     value = u"𝐼 𝓈𝑜𝓁𝑒𝓂𝓃𝓁𝓎 𝓈𝓌𝑒𝒶𝓇 𝓉𝒽𝒶𝓉 𝐼 𝒶𝓂 𝓊𝓅 𝓉𝑜 𝓃𝑜 𝑔𝑜𝑜𝒹"
     fs.set_tag(run_uuid, RunTag("message", value))
     tag = fs.get_run(run_uuid).data.tags[0]
     assert tag.key == "message"
     assert tag.value == value
Exemple #30
0
def create_tags_for_mlflow_tags(tags_dct, import_mlflow_tags):
    from mlflow.entities import RunTag
    tags = []
    for k, v in tags_dct.items():
        if not import_mlflow_tags and k.startswith("mlflow."):
            k = PREFIX_SRC_RUN + "." + k
        tags.append(RunTag(k, str(v)))
    return tags