Ejemplo n.º 1
0
def test_correct_filtering(filter_string, matching_runs):
    runs = [
        Run(run_info=RunInfo(
            run_uuid="hi", run_id="hi", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 121, 1, 0)],
                params=[Param("my_param", "A")],
                tags=[])),
        Run(run_info=RunInfo(
            run_uuid="hi2", run_id="hi2", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 123, 1, 0)],
                params=[Param("my_param", "A")],
                tags=[RunTag("tag1", "C")])),
        Run(run_info=RunInfo(
            run_uuid="hi3", run_id="hi3", experiment_id=1,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 125, 1, 0)],
                params=[Param("my_param", "B")],
                tags=[RunTag("tag1", "D")])),
    ]
    filtered_runs = SearchUtils.filter(runs, filter_string)
    assert set(filtered_runs) == set([runs[i] for i in matching_runs])
Ejemplo n.º 2
0
    def test_creation_and_hydration(self):
        (ri1, run_id, experiment_id, user_id, status, start_time, end_time, lifecycle_stage,
         artifact_uri) = self._create()
        self._check(ri1, run_id, experiment_id, user_id, status, start_time, end_time,
                    lifecycle_stage, artifact_uri)
        as_dict = {
            "run_uuid": run_id,
            "run_id": run_id,
            "experiment_id": experiment_id,
            "user_id": user_id,
            "status": status,
            "start_time": start_time,
            "end_time": end_time,
            "lifecycle_stage": lifecycle_stage,
            "artifact_uri": artifact_uri
        }
        self.assertEqual(dict(ri1), as_dict)

        proto = ri1.to_proto()
        ri2 = RunInfo.from_proto(proto)
        self._check(ri2, run_id, experiment_id, user_id, status, start_time, end_time,
                    lifecycle_stage, artifact_uri)
        ri3 = RunInfo.from_dictionary(as_dict)
        self._check(ri3, run_id, experiment_id, user_id, status, start_time, end_time,
                    lifecycle_stage, artifact_uri)
        # Test that we can add a field to RunInfo and still deserialize it from a dictionary
        dict_copy_0 = as_dict.copy()
        dict_copy_0["my_new_field"] = "new field value"
        ri4 = RunInfo.from_dictionary(dict_copy_0)
        self._check(ri4, run_id, experiment_id, user_id, status, start_time, end_time,
                    lifecycle_stage, artifact_uri)
Ejemplo n.º 3
0
def test_pagination(page_token, max_results, matching_runs, expected_next_page_token):
    runs = [
        Run(run_info=RunInfo(
            run_uuid="0", run_id="0", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData([], [], [])),
        Run(run_info=RunInfo(
            run_uuid="1", run_id="1", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData([], [], [])),
        Run(run_info=RunInfo(
            run_uuid="2", run_id="2", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData([], [], []))
    ]
    encoded_page_token = None
    if page_token:
        encoded_page_token = base64.b64encode(json.dumps(page_token).encode("utf-8"))
    paginated_runs, next_page_token = SearchUtils.paginate(runs, encoded_page_token, max_results)

    paginated_run_indices = []
    for run in paginated_runs:
        for i, r in enumerate(runs):
            if r == run:
                paginated_run_indices.append(i)
                break
    assert paginated_run_indices == matching_runs

    decoded_next_page_token = None
    if next_page_token:
        decoded_next_page_token = json.loads(base64.b64decode(next_page_token))
    assert decoded_next_page_token == expected_next_page_token
Ejemplo n.º 4
0
    def test_creation_and_hydration(self):
        (ri1, run_uuid, experiment_id, name, source_type, source_name,
         entry_point_name, user_id, status, start_time, end_time,
         source_version, artifact_uri) = self._create()
        self._check(ri1, run_uuid, experiment_id, name, source_type,
                    source_name, entry_point_name, user_id, status, start_time,
                    end_time, source_version, artifact_uri)
        as_dict = {
            "run_uuid": run_uuid,
            "experiment_id": experiment_id,
            "name": name,
            "source_type": source_type,
            "source_name": source_name,
            "entry_point_name": entry_point_name,
            "user_id": user_id,
            "status": status,
            "start_time": start_time,
            "end_time": end_time,
            "source_version": source_version,
            "artifact_uri": artifact_uri,
        }
        self.assertEqual(dict(ri1), as_dict)

        proto = ri1.to_proto()
        ri2 = RunInfo.from_proto(proto)
        self._check(ri2, run_uuid, experiment_id, name, source_type,
                    source_name, entry_point_name, user_id, status, start_time,
                    end_time, source_version, artifact_uri)
        ri3 = RunInfo.from_dictionary(as_dict)
        self._check(ri3, run_uuid, experiment_id, name, source_type,
                    source_name, entry_point_name, user_id, status, start_time,
                    end_time, source_version, artifact_uri)
Ejemplo n.º 5
0
def test_correct_sorting(order_bys, matching_runs):
    runs = [
        Run(run_info=RunInfo(
            run_uuid="9", run_id="9", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 121, 1, 0)],
                params=[Param("my_param", "A")],
                tags=[])),
        Run(run_info=RunInfo(
            run_uuid="8", run_id="8", experiment_id=0,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FINISHED),
            start_time=1, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 123, 1, 0)],
                params=[Param("my_param", "A")],
                tags=[RunTag("tag1", "C")])),
        Run(run_info=RunInfo(
            run_uuid="7", run_id="7", experiment_id=1,
            user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
            start_time=1, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(
                metrics=[Metric("key1", 125, 1, 0)],
                params=[Param("my_param", "B")],
                tags=[RunTag("tag1", "D")])),
    ]
    sorted_runs = SearchUtils.sort(runs, order_bys)
    sorted_run_indices = []
    for run in sorted_runs:
        for i, r in enumerate(runs):
            if r == run:
                sorted_run_indices.append(i)
                break
    assert sorted_run_indices == matching_runs
Ejemplo n.º 6
0
def test_create_model_version_run_link_in_notebook_with_default_profile(
        mock_registry_store):
    experiment_id = 'test-exp-id'
    hostname = 'https://workspace.databricks.com/'
    workspace_id = '10002'
    run_id = 'runid'
    workspace_url = construct_run_url(hostname, experiment_id, run_id,
                                      workspace_id)
    get_run_mock = mock.MagicMock()
    get_run_mock.return_value = Run(
        RunInfo(run_id, experiment_id, 'userid', 'status', 0, 1, None), None)
    with mock.patch('mlflow.tracking.client.is_in_databricks_notebook',
                    return_value=True), \
            mock.patch('mlflow.tracking.client.get_workspace_info_from_dbutils',
                       return_value=(hostname, workspace_id)):
        client = MlflowClient(tracking_uri='databricks',
                              registry_uri='otherplace')
        client.get_run = get_run_mock
        mock_registry_store.create_model_version.return_value = \
            ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=workspace_url)
        model_version = client.create_model_version('name', 'source', 'runid')
        assert (model_version.run_link == workspace_url)
        # verify that the client generated the right URL
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", 'source', 'runid', [], workspace_url)
Ejemplo n.º 7
0
def test_order_by_metric_with_nans_infs_nones():
    metric_vals_str = ["nan", "inf", "-inf", "-1000", "0", "1000", "None"]
    runs = [
        Run(
            run_info=RunInfo(
                run_id=x,
                run_uuid=x,
                experiment_id=0,
                user_id="user",
                status=RunStatus.to_string(RunStatus.FINISHED),
                start_time=0,
                end_time=1,
                lifecycle_stage=LifecycleStage.ACTIVE,
            ),
            run_data=RunData(
                metrics=[Metric("x", None if x == "None" else float(x), 1, 0)
                         ]),
        ) for x in metric_vals_str
    ]
    sorted_runs_asc = [
        x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x asc"])
    ]
    sorted_runs_desc = [
        x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x desc"])
    ]
    # asc
    assert ["-inf", "-1000", "0", "1000", "inf", "nan",
            "None"] == sorted_runs_asc
    # desc
    assert ["inf", "1000", "0", "-1000", "-inf", "nan",
            "None"] == sorted_runs_desc
Ejemplo n.º 8
0
    def create_run(self, experiment_id, user_id, start_time, tags):
        """
        Create a run under the specified experiment ID, setting the run's status to "RUNNING"
        and the start time to the current time.

        :param experiment_id: String id of the experiment for this run
        :param user_id: ID of the user launching this run

        :return: The created Run object
        """
        if experiment_id is None:
            experiment_id = MLMDStore.DEFAULT_EXPERIMENT_ID

        experiment = self.get_experiment(experiment_id)
        run_uuid = uuid.uuid4().hex
        run_info = RunInfo(run_uuid=run_uuid,
                           run_id=run_uuid,
                           experiment_id=experiment_id,
                           artifact_uri=None,
                           user_id=user_id,
                           status=RunStatus.to_string(RunStatus.RUNNING),
                           start_time=start_time,
                           end_time=None,
                           lifecycle_stage=LifecycleStage.ACTIVE)

        for tag in tags:
            self.set_tag(run_uuid, tag)
        exec_name = run_uuid
        self.mldm_exec = self._get_or_create_run_execution(
            self.mldm_workspace, self.mldm_run, exec_name)
        print("Run " + exec_name + " created")
        return self.get_run(run_id=run_uuid)
Ejemplo n.º 9
0
 def _create():
     run_uuid = str(uuid.uuid4())
     experiment_id = random_int(10, 2000)
     name = random_str(random_int(10, 40))
     source_type = random_int(1, 4)
     source_name = random_str(random_int(100, 300))
     entry_point_name = random_str(random_int(100, 300))
     user_id = random_str(random_int(10, 25))
     status = random_int(1, 5)
     start_time = random_int(1, 10)
     end_time = start_time + random_int(1, 10)
     source_version = random_str(random_int(10, 40))
     tags = [
         RunTag(key=random_str(random_int(1, 5)),
                value=random_str(random_int(1, 5))) for _ in range(2)
     ]
     artifact_uri = random_str(random_int(10, 40))
     ri = RunInfo(run_uuid=run_uuid,
                  experiment_id=experiment_id,
                  name=name,
                  source_type=source_type,
                  source_name=source_name,
                  entry_point_name=entry_point_name,
                  user_id=user_id,
                  status=status,
                  start_time=start_time,
                  end_time=end_time,
                  source_version=source_version,
                  tags=tags,
                  artifact_uri=artifact_uri)
     return (ri, run_uuid, experiment_id, name, source_type, source_name,
             entry_point_name, user_id, status, start_time, end_time,
             source_version, tags, artifact_uri)
Ejemplo n.º 10
0
def test_get_run(init_store):
    expected_run_info = RunInfo(run_uuid="7b2e71956f3d4c08b042624a8d83700d",
                                experiment_id="hTb553MBNoOYfhXjnnQh",
                                user_id="1",
                                status="RUNNING",
                                start_time=1597324762662,
                                end_time=None,
                                lifecycle_stage="active",
                                artifact_uri="artifact_path/7b2e71956f3d4c08b042624a8d83700d"
                                "/artifacts",
                                run_id="7b2e71956f3d4c08b042624a8d83700d")

    expected_metrics = [Metric(key="metric0", value=20.0, timestamp=1597324762778, step=2),
                        Metric(key="metric1", value=7.0, timestamp=1597324762890, step=2)]

    expected_params = [Param(key="param0", value="val2"),
                       Param(key="param1", value="Val1"),
                       Param(key="param2", value="Val1"),
                       Param(key="param3", value="valeur4")]

    expected_tags = [RunTag(key="tag0", value="val2"),
                     RunTag(key="tag1", value="test3"),
                     RunTag(key="tag2", value="val2"),
                     RunTag(key="tag3", value="test3")]

    expected_run_data = RunData(metrics=expected_metrics,
                                params=expected_params, tags=expected_tags)

    run = init_store.get_run(expected_run_info._run_id)
    assert run._info == expected_run_info
    for i, metric in enumerate(run._data._metric_objs):
        assert metric.__dict__ == expected_run_data._metric_objs[i].__dict__
    assert run._data._params == expected_run_data._params
    assert run._data._tags == expected_run_data._tags
Ejemplo n.º 11
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags):
     """
     Creates a run with the specified attributes.
     """
     if self.get_experiment(experiment_id) is None:
         raise Exception(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     num_runs = len(self._list_run_uuids(experiment_id))
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="Run %s" % num_runs,
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        tags=tags)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     return Run(run_info=run_info, run_data=None)
Ejemplo n.º 12
0
 def _create():
     run_id = str(uuid.uuid4())
     experiment_id = str(random_int(10, 2000))
     user_id = random_str(random_int(10, 25))
     status = RunStatus.to_string(random.choice(RunStatus.all_status()))
     start_time = random_int(1, 10)
     end_time = start_time + random_int(1, 10)
     lifecycle_stage = LifecycleStage.ACTIVE
     artifact_uri = random_str(random_int(10, 40))
     ri = RunInfo(
         run_uuid=run_id,
         run_id=run_id,
         experiment_id=experiment_id,
         user_id=user_id,
         status=status,
         start_time=start_time,
         end_time=end_time,
         lifecycle_stage=lifecycle_stage,
         artifact_uri=artifact_uri,
     )
     return (
         ri,
         run_id,
         experiment_id,
         user_id,
         status,
         start_time,
         end_time,
         lifecycle_stage,
         artifact_uri,
     )
Ejemplo n.º 13
0
 def _create():
     run_id = str(uuid.uuid4())
     experiment_id = str(random_int(10, 2000))
     name = random_str(random_int(10, 40))
     source_type = random_int(1, 4)
     source_name = random_str(random_int(100, 300))
     entry_point_name = random_str(random_int(100, 300))
     user_id = random_str(random_int(10, 25))
     status = random_int(1, 5)
     start_time = random_int(1, 10)
     end_time = start_time + random_int(1, 10)
     source_version = random_str(random_int(10, 40))
     lifecycle_stage = LifecycleStage.ACTIVE
     artifact_uri = random_str(random_int(10, 40))
     ri = RunInfo(run_uuid=run_id,
                  run_id=run_id,
                  experiment_id=experiment_id,
                  name=name,
                  source_type=source_type,
                  source_name=source_name,
                  entry_point_name=entry_point_name,
                  user_id=user_id,
                  status=status,
                  start_time=start_time,
                  end_time=end_time,
                  source_version=source_version,
                  lifecycle_stage=lifecycle_stage,
                  artifact_uri=artifact_uri)
     return (ri, run_id, experiment_id, name, source_type, source_name,
             entry_point_name, user_id, status, start_time, end_time,
             source_version, lifecycle_stage, artifact_uri)
Ejemplo n.º 14
0
def test_create_model_version_run_link_with_configured_profile(
        mock_registry_store):
    experiment_id = "test-exp-id"
    hostname = "https://workspace.databricks.com/"
    workspace_id = "10002"
    run_id = "runid"
    workspace_url = construct_run_url(hostname, experiment_id, run_id,
                                      workspace_id)
    get_run_mock = mock.MagicMock()
    get_run_mock.return_value = Run(
        RunInfo(run_id, experiment_id, "userid", "status", 0, 1, None), None)
    with mock.patch(
            "mlflow.tracking.client.is_in_databricks_notebook",
            return_value=False
    ), mock.patch(
            "mlflow.tracking.client.get_workspace_info_from_databricks_secrets",
            return_value=(hostname, workspace_id),
    ):
        client = MlflowClient(tracking_uri="databricks",
                              registry_uri="otherplace")
        client.get_run = get_run_mock
        mock_registry_store.create_model_version.return_value = ModelVersion(
            "name",
            1,
            0,
            1,
            source="source",
            run_id=run_id,
            run_link=workspace_url)
        model_version = client.create_model_version("name", "source", "runid")
        assert model_version.run_link == workspace_url
        # verify that the client generated the right URL
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", "source", "runid", [], workspace_url, None)
Ejemplo n.º 15
0
 def test_string_repr(self):
     run_info = RunInfo(run_uuid="hi",
                        run_id="hi",
                        experiment_id=0,
                        name="name",
                        source_type=SourceType.PROJECT,
                        source_name="source-name",
                        entry_point_name="entry-point-name",
                        user_id="user-id",
                        status=RunStatus.FAILED,
                        start_time=0,
                        end_time=1,
                        source_version="version",
                        lifecycle_stage=LifecycleStage.ACTIVE)
     metrics = [
         Metric(key="key-%s" % i, value=i, timestamp=0, step=i)
         for i in range(3)
     ]
     run_data = RunData(metrics=metrics, params=[], tags=[])
     run1 = Run(run_info, run_data)
     expected = (
         "<Run: data=<RunData: metrics={'key-0': 0, 'key-1': 1, 'key-2': 2}, "
         "params={}, tags={}>, info=<RunInfo: artifact_uri=None, end_time=1, "
         "entry_point_name='entry-point-name', experiment_id=0, "
         "lifecycle_stage='active', name='name', run_id='hi', run_uuid='hi', "
         "source_name='source-name', source_type=3, source_version='version', "
         "start_time=0, status=4, user_id='user-id'>>")
     assert str(run1) == expected
Ejemplo n.º 16
0
 def update_run_info(self, run_id, run_status, end_time):
     """ Updates the metadata of the specified run. """
     req_body = message_to_json(
         UpdateRun(run_uuid=run_id, run_id=run_id, status=run_status, end_time=end_time)
     )
     response_proto = self._call_endpoint(UpdateRun, req_body)
     return RunInfo.from_proto(response_proto.run_info)
Ejemplo n.º 17
0
 def create_run(self, experiment_id, user_id, start_time, tags):
     """
     Creates a run with the specified attributes.
     """
     experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id,
             databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid, run_id=run_uuid, experiment_id=experiment_id,
                        artifact_uri=artifact_uri, user_id=user_id,
                        status=RunStatus.to_string(RunStatus.RUNNING),
                        start_time=start_time, end_time=None,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
     mkdir(run_dir)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     return self.get_run(run_id=run_uuid)
Ejemplo n.º 18
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(
            run_uuid=self.run_uuid,
            run_id=self.run_uuid,
            experiment_id=str(self.experiment_id),
            user_id=self.user_id,
            status=self.status,
            start_time=self.start_time,
            end_time=self.end_time,
            lifecycle_stage=self.lifecycle_stage,
            artifact_uri=self.artifact_uri,
        )

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags],
        )

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 19
0
def mlflow_run(
        name=RUN_NAME,
        status="RUNNING",
        end_time=None,
        lifecycle_stage=LifecycleStage.ACTIVE,
        name_tag=RunTag(MLFLOW_RUN_NAME, RUN_NAME),
        parent_run_id_tag=RunTag(MLFLOW_PARENT_RUN_ID,
                                 PARENT_RUN_UUID_HEX_STR),
):
    tags = [MLFLOW_TAG]
    if name_tag is not None:
        tags.append(name_tag)
    if parent_run_id_tag is not None:
        tags.append(parent_run_id_tag)
    data = RunData(params=[MLFLOW_PARAM], metrics=[MLFLOW_METRIC], tags=tags)
    info = RunInfo(
        run_uuid=RUN_UUID_HEX_STR,
        experiment_id=str(EXPERIMENT_ID),
        user_id="",
        status=status,
        start_time=RUN_STARTED_AT_MILLISECONDS,
        end_time=end_time,
        lifecycle_stage=lifecycle_stage,
        artifact_uri=ARTIFACT_LOCATION,
        run_id=RUN_UUID_HEX_STR,
    )
    return Run(info, data)
Ejemplo n.º 20
0
def create_run(
    run_id="",
    exp_id="",
    uid="",
    start=0,
    end=0,
    metrics=None,
    params=None,
    tags=None,
    status=RunStatus.FINISHED,
    a_uri=None,
):
    return Run(
        RunInfo(
            run_uuid=run_id,
            run_id=run_id,
            experiment_id=exp_id,
            user_id=uid,
            status=status,
            start_time=start,
            end_time=end,
            lifecycle_stage=LifecycleStage.ACTIVE,
            artifact_uri=a_uri,
        ),
        RunData(metrics=metrics, params=params, tags=tags),
    )
Ejemplo n.º 21
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 22
0
def _read_persisted_run_info_dict(run_info_dict):
    dict_copy = run_info_dict.copy()
    if 'lifecycle_stage' not in dict_copy:
        dict_copy['lifecycle_stage'] = LifecycleStage.ACTIVE

    # 'experiment_id' was changed from int to string, so we must cast to string
    # when reading legacy run_infos
    if isinstance(dict_copy["experiment_id"], int):
        dict_copy["experiment_id"] = str(dict_copy["experiment_id"])
    return RunInfo.from_dictionary(dict_copy)
Ejemplo n.º 23
0
def faculty_run_to_mlflow_run(faculty_run):
    lifecycle_stage = (
        LifecycleStage.ACTIVE
        if faculty_run.deleted_at is None
        else LifecycleStage.DELETED
    )
    start_time = _datetime_to_mlflow_timestamp(faculty_run.started_at)
    end_time = (
        _datetime_to_mlflow_timestamp(faculty_run.ended_at)
        if faculty_run.ended_at is not None
        else None
    )

    tag_dict = {tag.key: tag.value for tag in faculty_run.tags}

    extra_mlflow_tags = []

    # Set run name tag if set as attribute but not already a tag
    if MLFLOW_RUN_NAME not in tag_dict and faculty_run.name:
        extra_mlflow_tags.append(RunTag(MLFLOW_RUN_NAME, faculty_run.name))

    # Set parent run ID tag if set as attribute but not already a tag
    if (
        MLFLOW_PARENT_RUN_ID not in tag_dict
        and faculty_run.parent_run_id is not None
    ):
        extra_mlflow_tags.append(
            RunTag(MLFLOW_PARENT_RUN_ID, faculty_run.parent_run_id.hex)
        )

    run_info = RunInfo(
        run_uuid=faculty_run.id.hex,
        experiment_id=str(faculty_run.experiment_id),
        user_id="",
        status=_FACULTY_TO_MLFLOW_RUN_STATUS_MAP[faculty_run.status],
        start_time=start_time,
        end_time=end_time,
        lifecycle_stage=lifecycle_stage,
        artifact_uri=faculty_run.artifact_location,
        run_id=faculty_run.id.hex,
    )
    run_data = RunData(
        params=[
            faculty_param_to_mlflow_param(param)
            for param in faculty_run.params
        ],
        metrics=[
            faculty_metric_to_mlflow_metric(metric)
            for metric in faculty_run.metrics
        ],
        tags=[faculty_tag_to_mlflow_tag(tag) for tag in faculty_run.tags]
        + extra_mlflow_tags,
    )
    run = Run(run_info, run_data)
    return run
Ejemplo n.º 24
0
 def _hit_to_mlflow_run_info(self, hit: Any) -> RunInfo:
     return RunInfo(
         run_uuid=hit.run_id,
         run_id=hit.run_id,
         experiment_id=str(hit.experiment_id),
         user_id=hit.user_id,
         status=hit.status,
         start_time=hit.start_time,
         end_time=hit.end_time if hasattr(hit, 'end_time') else None,
         lifecycle_stage=hit.lifecycle_stage if hasattr(
             hit, 'lifecycle_stage') else None,
         artifact_uri=hit.artifact_uri
         if hasattr(hit, 'artifact_uri') else None)
Ejemplo n.º 25
0
def test_bad_comparators(entity_type, bad_comparators, key, entity_value):
    run = Run(run_info=RunInfo(
        run_uuid="hi", run_id="hi", experiment_id=0,
        user_id="user-id", status=RunStatus.to_string(RunStatus.FAILED),
        start_time=0, end_time=1, lifecycle_stage=LifecycleStage.ACTIVE),
        run_data=RunData(metrics=[], params=[], tags=[])
    )
    for bad_comparator in bad_comparators:
        bad_filter = "{entity_type}.{key} {comparator} {value}".format(
            entity_type=entity_type, key=key, comparator=bad_comparator, value=entity_value)
        with pytest.raises(MlflowException) as e:
            SearchUtils.filter([run], bad_filter)
        assert "Invalid comparator" in str(e.value.message)
Ejemplo n.º 26
0
def _read_persisted_run_info_dict(run_info_dict):
    dict_copy = run_info_dict.copy()
    if 'lifecycle_stage' not in dict_copy:
        dict_copy['lifecycle_stage'] = LifecycleStage.ACTIVE
    # 'status' is stored as an integer enum in meta file, but RunInfo.status field is a string.
    # converting to string before hydrating RunInfo.
    # If 'status' value not recorded in files, mark it as 'RUNNING' (default)
    dict_copy['status'] = RunStatus.to_string(run_info_dict.get('status', RunStatus.RUNNING))

    # 'experiment_id' was changed from int to string, so we must cast to string
    # when reading legacy run_infos
    if isinstance(dict_copy["experiment_id"], int):
        dict_copy["experiment_id"] = str(dict_copy["experiment_id"])
    return RunInfo.from_dictionary(dict_copy)
Ejemplo n.º 27
0
    def to_mlflow_entity(self) -> Run:
        run_info = RunInfo(run_uuid=self.meta.id,
                           run_id=self.meta.id,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags])
        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 28
0
 def create_run(self, experiment_id, user_id, run_name, source_type,
                source_name, entry_point_name, start_time, source_version,
                tags, parent_run_id):
     """
     Creates a run with the specified attributes.
     """
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id, databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        experiment_id=experiment_id,
                        name="",
                        artifact_uri=artifact_uri,
                        source_type=source_type,
                        source_name=source_name,
                        entry_point_name=entry_point_name,
                        user_id=user_id,
                        status=RunStatus.RUNNING,
                        start_time=start_time,
                        end_time=None,
                        source_version=source_version,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_uuid)
     mkdir(run_dir)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME,
                _make_persisted_run_info_dict(run_info))
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     if parent_run_id:
         self.set_tag(run_uuid,
                      RunTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
     if run_name:
         self.set_tag(run_uuid, RunTag(key=MLFLOW_RUN_NAME, value=run_name))
     return Run(run_info=run_info, run_data=None)
Ejemplo n.º 29
0
def test_filter_runs_by_start_time():
    runs = [
        Run(
            run_info=RunInfo(
                run_uuid=run_id,
                run_id=run_id,
                experiment_id=0,
                user_id="user-id",
                status=RunStatus.to_string(RunStatus.FINISHED),
                start_time=idx,
                end_time=1,
                lifecycle_stage=LifecycleStage.ACTIVE,
            ),
            run_data=RunData(),
        ) for idx, run_id in enumerate(["a", "b", "c"])
    ]
    assert SearchUtils.filter(runs, "attribute.start_time >= 0") == runs
    assert SearchUtils.filter(runs, "attribute.start_time > 1") == runs[2:]
    assert SearchUtils.filter(runs, "attribute.start_time = 2") == runs[2:]
def test_get_artifact_repo(artifact_uri, databricks_uri, uri_for_repo):
    with mock.patch(
            "mlflow.tracking._tracking_service.client.TrackingServiceClient.get_run",
            return_value=Run(
                RunInfo("uuid",
                        "expr_id",
                        "userid",
                        "status",
                        0,
                        10,
                        "active",
                        artifact_uri=artifact_uri),
                None,
            ),
    ), mock.patch(
            "mlflow.tracking._tracking_service.client.get_artifact_repository",
            return_value=None) as get_repo_mock:
        client = TrackingServiceClient(databricks_uri)
        client._get_artifact_repo("some-run-id")
        get_repo_mock.assert_called_once_with(uri_for_repo)