Ejemplo n.º 1
0
def test_bad_comparators(entity_type, bad_comparators, entity_value):
    run = Run(run_info=RunInfo(
        run_uuid="hi", run_id="hi", experiment_id=0,
        user_id="user-id", status=RunStatus.FAILED,
        start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
        run_data=RunData(metrics=[], params=[], tags=[])
    )
    for bad_comparator in bad_comparators:
        bad_filter = "{entity_type}.abc {comparator} {value}".format(
            entity_type=entity_type, comparator=bad_comparator, value=entity_value)
        sf = SearchFilter(filter_string=bad_filter)
        with pytest.raises(MlflowException) as e:
            sf.filter(run)
        assert "Invalid comparator" in str(e.value.message)
Ejemplo n.º 2
0
 def get_run(self, run_id):
     """
     Note: Will get both active and deleted runs.
     """
     _validate_run_id(run_id)
     run_info = self._get_run_info(run_id)
     if run_info is None:
         raise MlflowException(
             "Run '%s' metadata is in invalid state." % run_id,
             databricks_pb2.INVALID_STATE)
     metrics = self.get_all_metrics(run_id)
     params = self.get_all_params(run_id)
     tags = self.get_all_tags(run_id)
     return Run(run_info, RunData(metrics, params, tags))
Ejemplo n.º 3
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags, parent_run_id):
     """
     Creates a run with the specified attributes.
     """
     experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id, databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if parent_run_id:
         self.set_tag(run_uuid,
                      RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Ejemplo n.º 4
0
 def test_string_repr(self):
     run_info = RunInfo(
         run_uuid="hi", experiment_id=0, name="name", source_type=SourceType.PROJECT,
         source_name="source-name", entry_point_name="entry-point-name",
         user_id="user-id", status=RunStatus.FAILED, start_time=0, end_time=1,
         source_version="version", lifecycle_stage=LifecycleStage.ACTIVE)
     metrics = [Metric("key-%s" % i, i, 0) for i in range(3)]
     run_data = RunData(metrics=metrics, params=[], tags=[])
     run1 = Run(run_info, run_data)
     expected = ("<Run: data=<RunData: metrics={'key-0': 0, 'key-1': 1, 'key-2': 2}, "
                 "params={}, tags={}>, info=<RunInfo: artifact_uri=None, end_time=1, "
                 "entry_point_name='entry-point-name', experiment_id=0, "
                 "lifecycle_stage='active', name='name', run_uuid='hi', "
                 "source_name='source-name', source_type=3, source_version='version', "
                 "start_time=0, status=4, user_id='user-id'>>")
     assert str(run1) == expected
Ejemplo n.º 5
0
    def to_mlflow_entity(self) -> Run:
        run_info = RunInfo(run_uuid=self.meta.id,
                           run_id=self.meta.id,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags])
        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 6
0
def faculty_run_to_mlflow_run(faculty_run):
    lifecycle_stage = (LifecycleStage.ACTIVE if faculty_run.deleted_at is None
                       else LifecycleStage.DELETED)
    start_time = _datetime_to_mlflow_timestamp(faculty_run.started_at)
    end_time = (_datetime_to_mlflow_timestamp(faculty_run.ended_at)
                if faculty_run.ended_at is not None else None)

    tag_dict = {tag.key: tag.value for tag in faculty_run.tags}

    extra_mlflow_tags = []

    # Set run name tag if set as attribute but not already a tag
    if MLFLOW_RUN_NAME not in tag_dict and faculty_run.name:
        extra_mlflow_tags.append(RunTag(MLFLOW_RUN_NAME, faculty_run.name))

    # Set parent run ID tag if set as attribute but not already a tag
    if (MLFLOW_PARENT_RUN_ID not in tag_dict
            and faculty_run.parent_run_id is not None):
        extra_mlflow_tags.append(
            RunTag(MLFLOW_PARENT_RUN_ID, faculty_run.parent_run_id.hex))

    run_info = RunInfo(
        run_uuid=faculty_run.id.hex,
        experiment_id=str(faculty_run.experiment_id),
        user_id="",
        status=_FACULTY_TO_MLFLOW_RUN_STATUS_MAP[faculty_run.status],
        start_time=start_time,
        end_time=end_time,
        lifecycle_stage=lifecycle_stage,
        artifact_uri=faculty_run.artifact_location,
        run_id=faculty_run.id.hex,
    )
    run_data = RunData(
        params=[
            faculty_param_to_mlflow_param(param)
            for param in faculty_run.params
        ],
        metrics=[
            faculty_metric_to_mlflow_metric(metric)
            for metric in faculty_run.metrics
        ],
        tags=[faculty_tag_to_mlflow_tag(tag)
              for tag in faculty_run.tags] + extra_mlflow_tags,
    )
    run = Run(run_info, run_data)
    return run
Ejemplo n.º 7
0
def create_run(run_id="", exp_id="", uid="", start=0, metrics=None, params=None, tags=None,
               status=RunStatus.FINISHED, a_uri=None):
    return Run(
        RunInfo(
            run_uuid=run_id,
            run_id=run_id,
            experiment_id=exp_id,
            user_id=uid,
            status=status,
            start_time=start,
            end_time=0,
            lifecycle_stage=LifecycleStage.ACTIVE,
            artifact_uri=a_uri
        ), RunData(
            metrics=metrics,
            params=params,
            tags=tags
        ))
Ejemplo n.º 8
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags):
     """
     Creates a run with the specified attributes.
     """
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise Exception(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id)
     if experiment.lifecycle_stage != Experiment.ACTIVE_LIFECYCLE:
         raise Exception(
             'Could not create run under non-active experiment with ID '
             '%s.' % experiment_id)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=RunInfo.ACTIVE_LIFECYCLE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME,
                _make_persisted_run_info_dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Ejemplo n.º 9
0
def test_filter_runs_by_start_time():
    runs = [
        Run(
            run_info=RunInfo(
                run_uuid=run_id,
                run_id=run_id,
                experiment_id=0,
                user_id="user-id",
                status=RunStatus.to_string(RunStatus.FINISHED),
                start_time=idx,
                end_time=1,
                lifecycle_stage=LifecycleStage.ACTIVE,
            ),
            run_data=RunData(),
        ) for idx, run_id in enumerate(["a", "b", "c"])
    ]
    assert SearchUtils.filter(runs, "attribute.start_time >= 0") == runs
    assert SearchUtils.filter(runs, "attribute.start_time > 1") == runs[2:]
    assert SearchUtils.filter(runs, "attribute.start_time = 2") == runs[2:]
Ejemplo n.º 10
0
    def test_creation_and_hydration(self):
        run_data, metrics, params, tags = TestRunData._create()
        (run_info, run_id, experiment_id, name, source_type, source_name,
         entry_point_name, user_id, status, start_time, end_time,
         source_version, lifecycle_stage,
         artifact_uri) = TestRunInfo._create()

        run1 = Run(run_info, run_data)

        self._check_run(run1, run_info, metrics, params, tags)

        as_dict = {
            "info": {
                "run_uuid": run_id,
                "run_id": run_id,
                "experiment_id": experiment_id,
                "name": name,
                "source_type": source_type,
                "source_name": source_name,
                "entry_point_name": entry_point_name,
                "user_id": user_id,
                "status": status,
                "start_time": start_time,
                "end_time": end_time,
                "source_version": source_version,
                "lifecycle_stage": lifecycle_stage,
                "artifact_uri": artifact_uri,
            },
            "data": {
                "metrics": {m.key: m.value
                            for m in metrics},
                "params": {p.key: p.value
                           for p in params},
                "tags": {t.key: t.value
                         for t in tags}
            }
        }
        self.assertEqual(run1.to_dictionary(), as_dict)

        proto = run1.to_proto()
        run2 = Run.from_proto(proto)
        self._check_run(run2, run_info, metrics, params, tags)
def test_get_artifact_repo(artifact_uri, databricks_uri, uri_for_repo):
    with mock.patch(
            "mlflow.tracking._tracking_service.client.TrackingServiceClient.get_run",
            return_value=Run(
                RunInfo("uuid",
                        "expr_id",
                        "userid",
                        "status",
                        0,
                        10,
                        "active",
                        artifact_uri=artifact_uri),
                None,
            ),
    ), mock.patch(
            "mlflow.tracking._tracking_service.client.get_artifact_repository",
            return_value=None) as get_repo_mock:
        client = TrackingServiceClient(databricks_uri)
        client._get_artifact_repo("some-run-id")
        get_repo_mock.assert_called_once_with(uri_for_repo)
Ejemplo n.º 12
0
def test_order_by_metric_with_nans_and_infs():
    metric_vals_str = ["nan", "inf", "-inf", "-1000", "0", "1000"]
    runs = [
        Run(run_info=RunInfo(run_id=x, run_uuid=x, experiment_id=0, user_id="user",
                             status=RunStatus.to_string(RunStatus.FINISHED),
                             start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("x", float(x), 1, 0)])
            ) for x in metric_vals_str
    ]
    sorted_runs_asc = [
        x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x asc"])
    ]
    sorted_runs_desc = [
        x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x desc"])
    ]
    # asc
    assert ["-inf", "-1000", "0", "1000", "inf", "nan"] == sorted_runs_asc
    # desc
    assert ["inf", "1000", "0", "-1000", "-inf", "nan"] == sorted_runs_desc
Ejemplo n.º 13
0
def test_bad_comparators(entity_type, bad_comparators, key, entity_value):
    run = Run(
        run_info=RunInfo(
            run_uuid="hi",
            run_id="hi",
            experiment_id=0,
            user_id="user-id",
            status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0,
            end_time=1,
            lifecycle_stage=LifecycleStage.ACTIVE,
        ),
        run_data=RunData(metrics=[], params=[], tags=[]),
    )
    for bad_comparator in bad_comparators:
        bad_filter = "{entity_type}.{key} {comparator} {value}".format(
            entity_type=entity_type, key=key, comparator=bad_comparator, value=entity_value
        )
        with pytest.raises(MlflowException, match="Invalid comparator"):
            SearchUtils.filter([run], bad_filter)
Ejemplo n.º 14
0
def test_create_model_version_run_link_with_configured_profile(mock_registry_store):
    experiment_id = 'test-exp-id'
    hostname = 'https://workspace.databricks.com/'
    workspace_id = '10002'
    run_id = 'runid'
    workspace_url = construct_run_url(hostname, experiment_id, run_id, workspace_id)
    get_run_mock = mock.MagicMock()
    get_run_mock.return_value = Run(RunInfo(run_id, experiment_id, 'userid', 'status', 0, 1, None),
                                    None)
    with mock.patch('mlflow.tracking.client.is_in_databricks_notebook', return_value=False), mock\
            .patch('mlflow.tracking.client.get_workspace_info_from_databricks_secrets',
                   return_value=(hostname, workspace_id)):
        client = MlflowClient(tracking_uri='databricks', registry_uri='otherplace')
        client.get_run = get_run_mock
        mock_registry_store.create_model_version.return_value = \
            ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=workspace_url)
        model_version = client.create_model_version('name', 'source', 'runid')
        assert(model_version.run_link == workspace_url)
        # verify that the client generated the right URL
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", 'source', 'runid', [], workspace_url)
Ejemplo n.º 15
0
    def _generate_run(self, i, runs_dict):
        """
        Generate a run object and save to runs_dict keyed by run_id.
        Most of data just depends on i, and some data are hard-coded for simplicityGenerate n number of runs. Most of
        data just depends on n, and some data are hard-coded for simplicity.
        """
        key = f"key{i}"
        value = f"value{i}"
        start_time = 123456 * i
        end_time = start_time + (1000 * i)
        run_id = f"run_id_{i}"

        metrics = [Metric(key, value, start_time, "stage")]
        params = [Param(key, value)]
        tags = [RunTag(key, value)]
        run_info = RunInfo(run_id, "experiment_id", "user_id", "status",
                           start_time, end_time, "lifecycle_stage")
        run = Run(run_info=run_info,
                  run_data=RunData(metrics=metrics, params=params, tags=tags))
        runs_dict[run_id] = run
        return run
def test_artifact_repo_is_cached_per_run_id():
    uri = "ftp://*****:*****@host/path"
    with mock.patch(
            "mlflow.tracking._tracking_service.client.TrackingServiceClient.get_run",
            return_value=Run(
                RunInfo("uuid",
                        "expr_id",
                        "userid",
                        "status",
                        0,
                        10,
                        "active",
                        artifact_uri=uri),
                None,
            ),
    ):
        artifact_repo = TrackingServiceClient(
            "some_tracking_uri")._get_artifact_repo("some_run_id")
        another_artifact_repo = TrackingServiceClient(
            "some_tracking_uri")._get_artifact_repo("some_run_id")
        assert artifact_repo is another_artifact_repo
Ejemplo n.º 17
0
 def test_string_repr(self):
     run_info = RunInfo(run_uuid="hi",
                        experiment_id=0,
                        name="name",
                        source_type=SourceType.PROJECT,
                        source_name="source-name",
                        entry_point_name="entry-point-name",
                        user_id="user-id",
                        status=RunStatus.FAILED,
                        start_time=0,
                        end_time=1,
                        source_version="version")
     metrics = [Metric("key", i, 0) for i in range(5)]
     run_data = RunData(metrics=metrics, params=[], tags=[])
     run1 = Run(run_info, run_data)
     expected = "<Run: info=<RunInfo: run_uuid='hi', experiment_id=0, name='name', " \
                "source_type=3, source_name='source-name', " \
                "entry_point_name='entry-point-name', user_id='user-id', status=4, " \
                "start_time=0, end_time=1, source_version='version', artifact_uri=None>, " \
                "data=<RunData: metrics=[<Metric: key='key', value=0, timestamp=0>, " \
                "<Metric: key='key', value=1, timestamp=0>, ...], params=[], tags=[]>>"
     assert str(run1) == expected
Ejemplo n.º 18
0
    def get_run(self, run_id):
        """
        Fetch the run from backend store. The resulting :py:class:`Run <mlflow.entities.Run>`
        contains a collection of run metadata - :py:class:`RunInfo <mlflow.entities.RunInfo>`,
        as well as a collection of run parameters, tags, and metrics -
        :py:class`RunData <mlflow.entities.RunData>`. In the case where multiple metrics with the
        same key are logged for the run, the :py:class:`RunData <mlflow.entities.RunData>` contains
        the value at the latest timestamp for each metric. If there are multiple values with the
        latest timestamp for a given metric, the maximum of these values is returned.

        :param run_id: Unique identifier for the run.

        :return: A single :py:class:`mlflow.entities.Run` object, if the run exists. Otherwise,
                 raises an exception.
        """

        _validate_run_id(run_id)
        run_info = self._get_run_info(run_id)
        metrics = self.get_all_metrics(run_id)
        params = self.get_all_params(run_id)
        tags = self.get_all_tags(run_id)
        return Run(run_info, RunData(metrics, params, tags))
Ejemplo n.º 19
0
    def test_creation_and_hydration(self):
        run_data, metrics, params, tags = TestRunData._create()
        (run_info, run_uuid, experiment_id, name, source_type, source_name,
         entry_point_name, user_id, status, start_time, end_time,
         source_version, artifact_uri) = TestRunInfo._create()

        run1 = Run(run_info, run_data)

        self._check_run(run1, run_info, run_data)

        as_dict = {
            "info": {
                "run_uuid": run_uuid,
                "experiment_id": experiment_id,
                "name": name,
                "source_type": source_type,
                "source_name": source_name,
                "entry_point_name": entry_point_name,
                "user_id": user_id,
                "status": status,
                "start_time": start_time,
                "end_time": end_time,
                "source_version": source_version,
                "artifact_uri": artifact_uri,
            },
            "data": {
                "metrics": metrics,
                "params": params,
                "tags": tags
            }
        }
        self.assertEqual(run1.to_dictionary(), as_dict)

        # proto = run1.to_proto()
        # run2 = Run.from_proto(proto)
        # self._check_run(run2, run_info, run_data)

        run3 = Run.from_dictionary(as_dict)
        self._check_run(run3, run_info, run_data)
Ejemplo n.º 20
0
    def to_mlflow_entity(self, session):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        last_metrics = self.get_last_recorded_metrics(session)

        all_metrics = [
            Metric(key=m[1],
                   value=m[4] if not m[5] else float("nan"),
                   timestamp=m[3],
                   step=m[2]) for m in last_metrics
        ]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 21
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 22
0
 def test_string_repr(self):
     run_info = RunInfo(run_uuid="hi",
                        run_id="hi",
                        experiment_id=0,
                        user_id="user-id",
                        status=RunStatus.FAILED,
                        start_time=0,
                        end_time=1,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     metrics = [
         Metric(key="key-%s" % i, value=i, timestamp=0, step=i)
         for i in range(3)
     ]
     run_data = RunData(metrics=metrics, params=[], tags=[])
     run1 = Run(run_info, run_data)
     expected = (
         "<Run: data=<RunData: metrics={'key-0': 0, 'key-1': 1, 'key-2': 2}, "
         "params={}, tags={}>, info=<RunInfo: artifact_uri=None, end_time=1, "
         "experiment_id=0, "
         "lifecycle_stage='active', run_id='hi', run_uuid='hi', "
         "start_time=0, status=4, user_id='user-id'>>")
     assert str(run1) == expected
Ejemplo n.º 23
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           name=self.name,
                           source_type=SourceType.from_string(
                               self.source_type),
                           source_name=self.source_name,
                           entry_point_name=self.entry_point_name,
                           user_id=self.user_id,
                           status=RunStatus.from_string(self.status),
                           start_time=self.start_time,
                           end_time=self.end_time,
                           source_version=self.source_version,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 24
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags):
     """
     Creates a run with the specified attributes.
     """
     if self.get_experiment(experiment_id) is None:
         raise Exception(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     num_runs = len(self._list_run_uuids(experiment_id))
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="Run %s" % num_runs,
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME,
                self._make_run_info_dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     return Run(run_info=run_info, run_data=None)
Ejemplo n.º 25
0
    def _search_runs(
        self,
        experiment_ids,
        filter_string,
        run_view_type,
        max_results,
        order_by,
        page_token,
    ):
        if max_results > SEARCH_MAX_RESULTS_THRESHOLD:
            raise MlflowException(
                "Invalid value for request parameter max_results. It must be at "
                "most {}, but got value {}".format(
                    SEARCH_MAX_RESULTS_THRESHOLD, max_results),
                INVALID_PARAMETER_VALUE,
            )
        runs = []
        for experiment_id in experiment_ids:
            run_ids = self._list_runs_ids(experiment_id, run_view_type)
            run_infos = [
                _dict_to_run_info(r) for r in self._get_run_list(run_ids)
            ]
            for run_info in run_infos:
                # Load the metrics, params and tags for the run
                run_id = run_info.run_id
                metrics = self.get_all_metrics(run_id)
                params = self.get_all_params(run_id)
                tags = self.get_all_tags(run_id)
                run = Run(run_info, RunData(metrics, params, tags))
                runs.append(run)

        filtered = SearchUtils.filter(runs, filter_string)
        sorted_runs = SearchUtils.sort(filtered, order_by)
        runs, next_page_token = SearchUtils.paginate(sorted_runs, page_token,
                                                     max_results)
        return runs, next_page_token
Ejemplo n.º 26
0
 def list_runs(self, experiment_id):
     """:return: list of :py:class:`mlflow.entities.Run` (with only RunInfo filled)"""
     run_infos = self.store.list_run_infos(experiment_id)
     return [Run(run_info.run_uuid, run_info) for run_info in run_infos]
Ejemplo n.º 27
0
 def _get_run_from_info(self, run_info):
     metrics = self._get_all_metrics(run_info)
     params = self._get_all_params(run_info)
     tags = self._get_all_tags(run_info)
     return Run(run_info, RunData(metrics, params, tags))
Ejemplo n.º 28
0
experiment = Experiment(experiment_id="1",
                        name="experiment_name",
                        artifact_location="artifact_location",
                        lifecycle_stage=LifecycleStage.ACTIVE,
                        tags=[])
run_info = RunInfo(run_uuid="1",
                   run_id="1",
                   experiment_id="experiment_id",
                   user_id="unknown",
                   status=RunStatus.to_string(RunStatus.RUNNING),
                   start_time=1,
                   end_time=None,
                   lifecycle_stage=LifecycleStage.ACTIVE,
                   artifact_uri="artifact_uri")
run_data = RunData(metrics=[], params=[], tags=[])
run = Run(run_info=run_info, run_data=run_data)

metric = Metric(key="metric1", value=1, timestamp=1, step=1)

param = Param(key="param1", value="val1")

tag = RunTag(key="tag1", value="val1")

experiment_tag = ExperimentTag(key="tag1", value="val1")


@mock.patch(
    "mlflow_elasticsearchstore.elasticsearch_store.ElasticsearchStore.list_experiments"
)
@pytest.mark.usefixtures('create_mlflow_client')
def test_list_experiments(list_experiments_mock, create_mlflow_client):
Ejemplo n.º 29
0
 def to_mlflow_entity(self):
     # run has diff parameter names in __init__ than in properties_ so we do this manually
     info = _create_entity(RunInfo, self)
     data = _create_entity(RunData, self)
     return Run(run_info=info, run_data=data)
Ejemplo n.º 30
0
 def test_creating_run_with_absent_info_throws_exception(self):
     run_data = TestRunData._create()[0]
     with pytest.raises(MlflowException) as no_info_exc:
         Run(None, run_data)
     assert "run_info cannot be None" in str(no_info_exc)