Ejemplo n.º 1
0
    def test_create_run_with_parent_id(self):
        exp = self._experiment_factory('test_create_run_with_parent_id')
        expected = self._get_run_configs('booyya', experiment_id=exp)

        tags = [RunTag('3', '4'), RunTag('1', '2')]
        actual = self.store.create_run(
            expected["experiment_id"], expected["user_id"], expected["name"],
            SourceType.from_string(expected["source_type"]),
            expected["source_name"], expected["entry_point_name"],
            expected["start_time"], expected["source_version"], tags,
            "parent_uuid_5")

        self.assertEqual(actual.info.experiment_id, expected["experiment_id"])
        self.assertEqual(actual.info.user_id, expected["user_id"])
        self.assertEqual(actual.info.name, 'booyya')
        self.assertEqual(actual.info.source_type,
                         SourceType.from_string(expected["source_type"]))
        self.assertEqual(actual.info.source_name, expected["source_name"])
        self.assertEqual(actual.info.source_version,
                         expected["source_version"])
        self.assertEqual(actual.info.entry_point_name,
                         expected["entry_point_name"])
        self.assertEqual(actual.info.start_time, expected["start_time"])
        self.assertEqual(len(actual.data.tags), 4)

        name_tag = models.SqlTag(key='mlflow.runName',
                                 value='booyya').to_mlflow_entity()
        parent_id_tag = models.SqlTag(
            key='mlflow.parentRunId',
            value='parent_uuid_5').to_mlflow_entity()
        self.assertListEqual(actual.data.tags,
                             tags + [parent_id_tag, name_tag])
Ejemplo n.º 2
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the initialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        if isinstance(model, SqlRun):
            if base is RunData:
                # Run data contains list for metrics, params and tags
                # so obj will be a list so we need to convert those items
                if k == 'metrics':
                    # only get latest recorded metrics per key
                    metrics = {}
                    for o in obj:
                        existing_metric = metrics.get(o.key)
                        if (existing_metric is None) or (o.timestamp > existing_metric.timestamp)\
                            or (o.timestamp == existing_metric.timestamp
                                and o.value > existing_metric.value):
                            metrics[o.key] = Metric(o.key, o.value, o.timestamp)
                    obj = metrics.values()
                elif k == 'params':
                    obj = [Param(o.key, o.value) for o in obj]
                elif k == 'tags':
                    obj = [RunTag(o.key, o.value) for o in obj]
            elif base is RunInfo:
                if k == 'source_type':
                    obj = SourceType.from_string(obj)
                elif k == "status":
                    obj = RunStatus.from_string(obj)

        config[k] = obj
    return base(**config)
Ejemplo n.º 3
0
    def create_run(self,
                   experiment_id,
                   user_id=None,
                   run_name=None,
                   start_time=None,
                   parent_run_id=None,
                   tags=None):
        """
        Create a :py:class:`mlflow.entities.Run` object that can be associated with
        metrics, parameters, artifacts, etc.
        Unlike :py:func:`mlflow.projects.run`, creates objects but does not run code.
        Unlike :py:func:`mlflow.start_run`, does not change the "active run" used by
        :py:func:`mlflow.log_param`.

        :param user_id: If not provided, use the current user as a default.
        :param start_time: If not provided, use the current timestamp.
        :param parent_run_id Optional parent run ID - takes precedence over parent run ID included
                             in the `tags` argument.
        :param tags: A dictionary of key-value pairs that are converted into
                     :py:class:`mlflow.entities.RunTag` objects.
        :return: :py:class:`mlflow.entities.Run` that was created.
        """

        tags = tags if tags else {}

        # Extract run attributes from tags
        # This logic is temporary; by the 1.0 release, this information will only be stored in tags
        # and will not be available as attributes of the run
        final_parent_run_id =\
            tags.get(MLFLOW_PARENT_RUN_ID) if parent_run_id is None else parent_run_id
        source_name = tags.get(MLFLOW_SOURCE_NAME, "Python Application")
        source_version = tags.get(MLFLOW_GIT_COMMIT)
        entry_point_name = tags.get(MLFLOW_PROJECT_ENTRY_POINT)

        source_type_string = tags.get(MLFLOW_SOURCE_TYPE)
        if source_type_string is None:
            source_type = SourceType.LOCAL
        else:
            source_type = SourceType.from_string(source_type_string)

        return self.store.create_run(
            experiment_id=experiment_id,
            user_id=user_id if user_id is not None else _get_user_id(),
            run_name=run_name,
            start_time=start_time or int(time.time() * 1000),
            tags=[RunTag(key, value) for (key, value) in iteritems(tags)],
            # The below arguments remain set for backwards compatability:
            parent_run_id=final_parent_run_id,
            source_type=source_type,
            source_name=source_name,
            entry_point_name=entry_point_name,
            source_version=source_version)
Ejemplo n.º 4
0
    def test_create_run(self):
        experiment_id = self._experiment_factory('test_create_run')
        expected = self._get_run_configs('booyya', experiment_id=experiment_id)

        tags = [RunTag('3', '4'), RunTag('1', '2')]
        actual = self.store.create_run(expected["experiment_id"], expected["user_id"],
                                       expected["name"],
                                       SourceType.from_string(expected["source_type"]),
                                       expected["source_name"], expected["entry_point_name"],
                                       expected["start_time"], expected["source_version"],
                                       tags, None)

        self.assertEqual(actual.info.experiment_id, expected["experiment_id"])
        self.assertEqual(actual.info.user_id, expected["user_id"])
        self.assertEqual(actual.info.name, 'booyya')
        self.assertEqual(actual.info.source_type, SourceType.from_string(expected["source_type"]))
        self.assertEqual(actual.info.source_name, expected["source_name"])
        self.assertEqual(actual.info.source_version, expected["source_version"])
        self.assertEqual(actual.info.entry_point_name, expected["entry_point_name"])
        self.assertEqual(actual.info.start_time, expected["start_time"])
        self.assertEqual(len(actual.data.tags), 3)

        name_tag = models.SqlTag(key=MLFLOW_RUN_NAME, value='booyya').to_mlflow_entity()
        self.assertListEqual(actual.data.tags, tags + [name_tag])
Ejemplo n.º 5
0
def _create_entity(base, model):

    # create dict of kwargs properties for entity and return the initialized entity
    config = {}
    for k in base._properties():
        # check if its mlflow entity and build it
        obj = getattr(model, k)

        if isinstance(model, SqlRun):
            if base is RunData:
                # Run data contains list for metrics, params and tags
                # so obj will be a list so we need to convert those items
                if k == 'metrics':
                    # only get latest recorded metrics per key
                    metrics = {}
                    for o in obj:
                        existing_metric = metrics.get(o.key)
                        if (existing_metric is None)\
                            or ((o.step, o.timestamp, o.value) >=
                                (existing_metric.step, existing_metric.timestamp,
                                 existing_metric.value)):
                            metrics[o.key] = Metric(o.key, o.value,
                                                    o.timestamp, o.step)
                    obj = list(metrics.values())
                elif k == 'params':
                    obj = [Param(o.key, o.value) for o in obj]
                elif k == 'tags':
                    obj = [RunTag(o.key, o.value) for o in obj]
            elif base is RunInfo:
                if k == 'source_type':
                    obj = SourceType.from_string(obj)
                elif k == "status":
                    obj = RunStatus.from_string(obj)
                elif k == "experiment_id":
                    obj = str(obj)

        # Our data model defines experiment_ids as ints, but the in-memory representation was
        # changed to be a string in time for 1.0.
        if isinstance(model, SqlExperiment) and k == "experiment_id":
            obj = str(obj)

        config[k] = obj
    return base(**config)
Ejemplo n.º 6
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           name=self.name,
                           source_type=SourceType.from_string(
                               self.source_type),
                           source_name=self.source_name,
                           entry_point_name=self.entry_point_name,
                           user_id=self.user_id,
                           status=RunStatus.from_string(self.status),
                           start_time=self.start_time,
                           end_time=self.end_time,
                           source_version=self.source_version,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Ejemplo n.º 7
0
def start_run(run_uuid=None,
              experiment_id=None,
              source_name=None,
              source_version=None,
              entry_point_name=None,
              source_type=None,
              run_name=None,
              nested=False):
    """
    Start a new MLflow run, setting it as the active run under which metrics and parameters
    will be logged. The return value can be used as a context manager within a ``with`` block;
    otherwise, you must call ``end_run()`` to terminate the current run.

    If you pass a ``run_uuid`` or the ``MLFLOW_RUN_ID`` environment variable is set,
    ``start_run`` attempts to resume a run with the specified run ID and
    other parameters are ignored. ``run_uuid`` takes precedence over ``MLFLOW_RUN_ID``.

    :param run_uuid: If specified, get the run with the specified UUID and log parameters
                     and metrics under that run. The run's end time is unset and its status
                     is set to running, but the run's other attributes (``source_version``,
                     ``source_type``, etc.) are not changed.
    :param experiment_id: ID of the experiment under which to create the current run (applicable
                          only when ``run_uuid`` is not specified). If ``experiment_id`` argument
                          is unspecified, will look for valid experiment in the following order:
                          activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_ID`` env variable,
                          or the default experiment.
    :param source_name: Name of the source file or URI of the project to be associated with the run.
                        If none provided defaults to the current file.
    :param source_version: Optional Git commit hash to associate with the run.
    :param entry_point_name: Optional name of the entry point for the current run.
    :param source_type: Integer :py:class:`mlflow.entities.SourceType` describing the type
                        of the run ("local", "project", etc.). Defaults to
                        :py:class:`mlflow.entities.SourceType.LOCAL` ("local").
    :param run_name: Name of new run. Used only when ``run_uuid`` is unspecified.
    :param nested: Parameter which must be set to ``True`` to create nested runs.
    :return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping
             the run's state.
    """
    global _active_run_stack
    if len(_active_run_stack) > 0 and not nested:
        raise Exception(
            ("Run with UUID {} is already active. To start a nested " +
             "run call start_run with nested=True").format(
                 _active_run_stack[0].info.run_uuid))
    existing_run_uuid = run_uuid or os.environ.get(_RUN_ID_ENV_VAR, None)
    if existing_run_uuid:
        _validate_run_id(existing_run_uuid)
        active_run_obj = MlflowClient().get_run(existing_run_uuid)
        if active_run_obj.info.lifecycle_stage == LifecycleStage.DELETED:
            raise MlflowException(
                "Cannot start run with ID {} because it is in the "
                "deleted state.".format(existing_run_uuid))
    else:
        if len(_active_run_stack) > 0:
            parent_run_id = _active_run_stack[-1].info.run_uuid
        else:
            parent_run_id = None

        exp_id_for_run = experiment_id if experiment_id is not None else _get_experiment_id(
        )

        user_specified_tags = {}
        if source_name is not None:
            user_specified_tags[MLFLOW_SOURCE_NAME] = source_name
        if source_type is not None:
            user_specified_tags[MLFLOW_SOURCE_TYPE] = SourceType.to_string(
                source_type)
        if source_version is not None:
            user_specified_tags[MLFLOW_GIT_COMMIT] = source_version
        if entry_point_name is not None:
            user_specified_tags[MLFLOW_PROJECT_ENTRY_POINT] = entry_point_name

        tags = context.resolve_tags(user_specified_tags)

        # Polling resolved tags for run meta data : source_name, source_version,
        # entry_point_name, and source_type which is store in RunInfo for backward compatibility.
        # TODO: Remove all 4 of the following annotated backward compatibility fixes with API
        #  changes to create_run.
        active_run_obj = MlflowClient().create_run(
            experiment_id=exp_id_for_run,
            run_name=run_name,
            source_name=tags.get(
                MLFLOW_SOURCE_NAME
            ),  # TODO: for backward compatibility. Remove.
            source_version=tags.get(
                MLFLOW_GIT_COMMIT
            ),  # TODO: for backward compatibility. Remove.
            entry_point_name=tags.get(
                MLFLOW_PROJECT_ENTRY_POINT),  # TODO: remove
            source_type=SourceType.from_string(
                tags.get(MLFLOW_SOURCE_TYPE)),  # TODO: Remove
            tags=tags,
            parent_run_id=parent_run_id)

    _active_run_stack.append(ActiveRun(active_run_obj))
    return _active_run_stack[-1]