Esempio n. 1
0
 def _update_status(self):
     api_response = self._kube_api.read_namespaced_job_status(
         name=self._job_name, namespace=self._job_namespace, pretty=True)
     status = api_response.status
     with self._status_lock:
         if RunStatus.is_terminated(self._status):
             return self._status
         if self._status == RunStatus.SCHEDULED:
             if api_response.status.start_time is None:
                 _logger.info("Waiting for Job to start")
             else:
                 _logger.info("Job started.")
                 self._status = RunStatus.RUNNING
         if status.conditions is not None:
             for condition in status.conditions:
                 if condition.status == "True":
                     _logger.info(condition.message)
                     if condition.type == "Failed":
                         self._status = RunStatus.FAILED
                     elif condition.type == "Complete":
                         self._status = RunStatus.FINISHED
     return self._status
Esempio n. 2
0
def test_run(use_start_run):
    submitted_run = mlflow.projects.run(
        TEST_PROJECT_DIR,
        entry_point="test_tracking",
        parameters={"use_start_run": use_start_run},
        use_conda=False,
        experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
    )
    assert submitted_run.run_id is not None
    # Blocking runs should be finished when they return
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Test that we can call wait() on a synchronous run & that the run has the correct
    # status after calling wait().
    submitted_run.wait()
    validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
    # Validate run contents in the FileStore
    run_id = submitted_run.run_id
    mlflow_service = mlflow.tracking.MlflowClient()

    run_infos = mlflow_service.list_run_infos(
        experiment_id=FileStore.DEFAULT_EXPERIMENT_ID,
        run_view_type=ViewType.ACTIVE_ONLY)
    assert len(run_infos) == 1
    store_run_id = run_infos[0].run_id
    assert run_id == store_run_id
    run = mlflow_service.get_run(run_id)

    assert run.info.status == RunStatus.to_string(RunStatus.FINISHED)

    assert run.data.params == {
        "use_start_run": use_start_run,
    }
    assert run.data.metrics == {"some_key": 3}

    tags = run.data.tags
    assert tags[MLFLOW_USER] == MOCK_USER
    assert "file:" in tags[MLFLOW_SOURCE_NAME]
    assert tags[MLFLOW_SOURCE_TYPE] == SourceType.to_string(SourceType.PROJECT)
    assert tags[MLFLOW_PROJECT_ENTRY_POINT] == "test_tracking"
Esempio n. 3
0
    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           name=self.name,
                           source_type=SourceType.from_string(
                               self.source_type),
                           source_name=self.source_name,
                           entry_point_name=self.entry_point_name,
                           user_id=self.user_id,
                           status=RunStatus.from_string(self.status),
                           start_time=self.start_time,
                           end_time=self.end_time,
                           source_version=self.source_version,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)
Esempio n. 4
0
    def create_run(self, experiment_id, user_id, run_name, source_type,
                   source_name, entry_point_name, start_time, source_version,
                   tags, parent_run_id):
        experiment = self.get_experiment(experiment_id)

        if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
            raise MlflowException(
                'Experiment id={} must be active'.format(experiment_id),
                INVALID_STATE)

        run_uuid = uuid.uuid4().hex
        artifact_location = build_path(experiment.artifact_location, run_uuid,
                                       SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
        run = SqlRun(name=run_name or "",
                     artifact_uri=artifact_location,
                     run_uuid=run_uuid,
                     experiment_id=experiment_id,
                     source_type=SourceType.to_string(source_type),
                     source_name=source_name,
                     entry_point_name=entry_point_name,
                     user_id=user_id,
                     status=RunStatus.to_string(RunStatus.RUNNING),
                     start_time=start_time,
                     end_time=None,
                     source_version=source_version,
                     lifecycle_stage=LifecycleStage.ACTIVE)

        for tag in tags:
            run.tags.append(SqlTag(key=tag.key, value=tag.value))
        if parent_run_id:
            run.tags.append(
                SqlTag(key=MLFLOW_PARENT_RUN_ID, value=parent_run_id))
        if run_name:
            run.tags.append(SqlTag(key=MLFLOW_RUN_NAME, value=run_name))

        self._save_to_db([run])

        return run.to_mlflow_entity()
    def on_pipeline_error(
        self,
        error: Exception,
        run_params: Dict[str, Any],
        pipeline: Pipeline,
        catalog: DataCatalog,
    ):
        """Hook invoked when the pipeline execution fails.
         All the mlflow runs must be closed to avoid interference with further execution.

        Args:
            error: (Not used) The uncaught exception thrown during the pipeline run.
            run_params: (Not used) The params used to run the pipeline.
                Should be identical to the data logged by Journal with the following schema::

                   {
                     "project_path": str,
                     "env": str,
                     "kedro_version": str,
                     "tags": Optional[List[str]],
                     "from_nodes": Optional[List[str]],
                     "to_nodes": Optional[List[str]],
                     "node_names": Optional[List[str]],
                     "from_inputs": Optional[List[str]],
                     "load_versions": Optional[List[str]],
                     "pipeline_name": str,
                     "extra_params": Optional[Dict[str, Any]]
                   }
            pipeline: (Not used) The ``Pipeline`` that will was run.
            catalog: (Not used) The ``DataCatalog`` used during the run.
        """
        if self._is_mlflow_enabled:
            while mlflow.active_run():
                mlflow.end_run(RunStatus.to_string(RunStatus.FAILED))
        else:  # pragma: no cover
            # the catalog is supposed to be reloaded each time with _get_catalog,
            # hence it should not be modified. this is only a safeguard
            switch_catalog_logging(catalog, True)
    def create_run(self, experiment_id, user_id, start_time, tags):
        with self.ManagedSessionMaker() as session:
            experiment = self.get_experiment(experiment_id)
            self._check_experiment_is_active(experiment)

            run_id = uuid.uuid4().hex
            artifact_location = append_to_uri_path(experiment.artifact_location, run_id,
                                                   SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
            run = SqlRun(name="", artifact_uri=artifact_location, run_uuid=run_id,
                         experiment_id=experiment_id,
                         source_type=SourceType.to_string(SourceType.UNKNOWN),
                         source_name="", entry_point_name="",
                         user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING),
                         start_time=start_time, end_time=None,
                         source_version="", lifecycle_stage=LifecycleStage.ACTIVE)

            tags_dict = {}
            for tag in tags:
                tags_dict[tag.key] = tag.value
            run.tags = [SqlTag(key=key, value=value) for key, value in tags_dict.items()]
            self._save_to_db(objs=run, session=session)

            return run.to_mlflow_entity()
Esempio n. 7
0
def test_bad_comparators(entity_type, bad_comparators, key, entity_value):
    run = Run(
        run_info=RunInfo(
            run_uuid="hi",
            run_id="hi",
            experiment_id=0,
            user_id="user-id",
            status=RunStatus.to_string(RunStatus.FAILED),
            start_time=0,
            end_time=1,
            lifecycle_stage=LifecycleStage.ACTIVE,
        ),
        run_data=RunData(metrics=[], params=[], tags=[]),
    )
    for bad_comparator in bad_comparators:
        bad_filter = "{entity_type}.{key} {comparator} {value}".format(
            entity_type=entity_type,
            key=key,
            comparator=bad_comparator,
            value=entity_value)
        with pytest.raises(MlflowException) as e:
            SearchUtils.filter([run], bad_filter)
        assert "Invalid comparator" in str(e.value.message)
Esempio n. 8
0
def get_infos(run_uuid, store=None):
    from mlflow.entities import RunStatus

    run = get_run(run_uuid, store=store)

    if run.info.end_time is None:
        duration = None

    else:
        duration = run.info.end_time - run.info.start_time

    return {
        ("run", "uuid"): run.info.run_uuid,
        ("run", "experiment_id"): run.info.experiment_id,
        ("run", "status"): RunStatus.to_string(run.info.status),
        ("run", "start_time"): run.info.start_time,
        ("run", "end_time"): run.info.end_time,
        ("run", "duration"): duration,
        **{("metric", m.key): m.value
           for m in get_all_metrics(run_uuid, store=store)},
        **{("param", p.key): p.value
           for p in get_all_params(run_uuid, store=store)},
    }
Esempio n. 9
0
 def create_run(self, experiment_id, user_id, start_time, tags):
     """
     Creates a run with the specified attributes.
     """
     experiment_id = FileStore.DEFAULT_EXPERIMENT_ID if experiment_id is None else experiment_id
     experiment = self.get_experiment(experiment_id)
     if experiment is None:
         raise MlflowException(
             "Could not create run under experiment with ID %s - no such experiment "
             "exists." % experiment_id,
             databricks_pb2.RESOURCE_DOES_NOT_EXIST)
     if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
         raise MlflowException(
             "Could not create run under non-active experiment with ID "
             "%s." % experiment_id, databricks_pb2.INVALID_STATE)
     run_uuid = uuid.uuid4().hex
     artifact_uri = self._get_artifact_dir(experiment_id, run_uuid)
     run_info = RunInfo(run_uuid=run_uuid,
                        run_id=run_uuid,
                        experiment_id=experiment_id,
                        artifact_uri=artifact_uri,
                        user_id=user_id,
                        status=RunStatus.to_string(RunStatus.RUNNING),
                        start_time=start_time,
                        end_time=None,
                        lifecycle_stage=LifecycleStage.ACTIVE)
     # Persist run metadata and create directories for logging metrics, parameters, artifacts
     run_dir = self._get_run_dir(run_info.experiment_id, run_info.run_id)
     mkdir(run_dir)
     run_info_dict = _make_persisted_run_info_dict(run_info)
     write_yaml(run_dir, FileStore.META_DATA_FILE_NAME, run_info_dict)
     mkdir(run_dir, FileStore.METRICS_FOLDER_NAME)
     mkdir(run_dir, FileStore.PARAMS_FOLDER_NAME)
     mkdir(run_dir, FileStore.ARTIFACTS_FOLDER_NAME)
     for tag in tags:
         self.set_tag(run_uuid, tag)
     return self.get_run(run_id=run_uuid)
Esempio n. 10
0
def end_run(status=RunStatus.to_string(RunStatus.FINISHED)):
    """End an active MLflow run (if there is one).

    .. code-block:: python
        :caption: Example

        import mlflow

        # Start run and get status
        mlflow.start_run()
        run = mlflow.active_run()
        print("run_id: {}; status: {}".format(run.info.run_id, run.info.status))

        # End run and get status
        mlflow.end_run()
        run = mlflow.get_run(run.info.run_id)
        print("run_id: {}; status: {}".format(run.info.run_id, run.info.status))
        print("--")

        # Check for any active runs
        print("Active run: {}".format(mlflow.active_run()))

    .. code-block:: text
        :caption: Output

        run_id: b47ee4563368419880b44ad8535f6371; status: RUNNING
        run_id: b47ee4563368419880b44ad8535f6371; status: FINISHED
        --
        Active run: None
    """
    global _active_run_stack
    if len(_active_run_stack) > 0:
        # Clear out the global existing run environment variable as well.
        env.unset_variable(_RUN_ID_ENV_VAR)
        run = _active_run_stack.pop()
        MlflowClient().set_terminated(run.info.run_id, status)
Esempio n. 11
0
def test_order_by_metric_with_nans_and_infs():
    metric_vals_str = ["nan", "inf", "-inf", "-1000", "0", "1000"]
    runs = [
        Run(run_info=RunInfo(run_id=x,
                             run_uuid=x,
                             experiment_id=0,
                             user_id="user",
                             status=RunStatus.to_string(RunStatus.FINISHED),
                             start_time=0,
                             end_time=1,
                             lifecycle_stage=LifecycleStage.ACTIVE),
            run_data=RunData(metrics=[Metric("x", float(x), 1, 0)]))
        for x in metric_vals_str
    ]
    sorted_runs_asc = [
        x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x asc"])
    ]
    sorted_runs_desc = [
        x.info.run_id for x in SearchUtils.sort(runs, ["metrics.x desc"])
    ]
    # asc
    assert ["-inf", "-1000", "0", "1000", "inf", "nan"] == sorted_runs_asc
    # desc
    assert ["inf", "1000", "0", "-1000", "-inf", "nan"] == sorted_runs_desc
def test_param_search_estimator(  # pylint: disable=unused-argument
        metric_name, param_search_estimator, spark_session,
        dataset_regression):
    mlflow.pyspark.ml.autolog()
    lr = LinearRegression(solver="l-bfgs", regParam=0.01)
    lrParamMaps = [
        {
            lr.maxIter: 1,
            lr.standardization: False
        },
        {
            lr.maxIter: 200,
            lr.standardization: True
        },
        {
            lr.maxIter: 2,
            lr.standardization: False
        },
    ]
    best_params = {
        "LinearRegression.maxIter": 200,
        "LinearRegression.standardization": True
    }
    eva = RegressionEvaluator(metricName=metric_name)
    estimator = param_search_estimator(estimator=lr,
                                       estimatorParamMaps=lrParamMaps,
                                       evaluator=eva)
    with mlflow.start_run() as run:
        model = estimator.fit(dataset_regression)
        estimator_info = load_json_artifact("estimator_info.json")
        metadata = _gen_estimator_metadata(estimator)
        assert metadata.hierarchy == estimator_info["hierarchy"]

        param_search_estiamtor_info = estimator_info[
            metadata.uid_to_indexed_name_map[estimator.uid]]
        assert param_search_estiamtor_info[
            "tuned_estimator_parameter_map"] == _get_instance_param_map_recursively(
                lr, 1, metadata.uid_to_indexed_name_map)
        assert param_search_estiamtor_info[
            "tuning_parameter_map_list"] == _get_tuning_param_maps(
                estimator, metadata.uid_to_indexed_name_map)

        assert best_params == load_json_artifact("best_parameters.json")

        search_results = load_json_csv("search_results.csv")

    uid_to_indexed_name_map = metadata.uid_to_indexed_name_map
    run_id = run.info.run_id
    run_data = get_run_data(run_id)
    assert run_data.params == truncate_param_dict(
        stringify_dict_values({
            **_get_instance_param_map(estimator, uid_to_indexed_name_map),
            **{f"best_{k}": v
               for k, v in best_params.items()},
        }))
    assert run_data.tags == get_expected_class_tags(estimator)
    assert MODEL_DIR in run_data.artifacts
    loaded_model = load_model_by_run_id(run_id)
    assert loaded_model.stages[0].uid == model.uid
    loaded_best_model = load_model_by_run_id(run_id, "best_model")
    assert loaded_best_model.stages[0].uid == model.bestModel.uid
    assert run_data.artifacts == [
        "best_model",
        "best_parameters.json",
        "estimator_info.json",
        "model",
        "search_results.csv",
    ]

    client = mlflow.tracking.MlflowClient()
    child_runs = client.search_runs(
        run.info.experiment_id,
        "tags.`mlflow.parentRunId` = '{}'".format(run_id))
    assert len(child_runs) == len(search_results)

    for row_index, row in search_results.iterrows():
        row_params = json.loads(row.get("params", "{}"))
        for param_name, param_value in row_params.items():
            assert param_value == row.get(f"param.{param_name}")

        params_search_clause = " and ".join([
            "params.`{}` = '{}'".format(key.split(".")[1], value)
            for key, value in row_params.items()
        ])
        search_filter = "tags.`mlflow.parentRunId` = '{}' and {}".format(
            run_id, params_search_clause)
        child_runs = client.search_runs(run.info.experiment_id, search_filter)
        assert len(child_runs) == 1
        child_run = child_runs[0]
        assert child_run.info.status == RunStatus.to_string(RunStatus.FINISHED)
        run_data = get_run_data(child_run.info.run_id)
        child_estimator = estimator.getEstimator().copy(
            estimator.getEstimatorParamMaps()[row_index])
        assert run_data.tags == get_expected_class_tags(child_estimator)
        assert run_data.params == truncate_param_dict(
            stringify_dict_values({
                **_get_instance_param_map(child_estimator, uid_to_indexed_name_map)
            }))
        assert (child_run.data.tags.get(MLFLOW_AUTOLOGGING) ==
                mlflow.pyspark.ml.AUTOLOGGING_INTEGRATION_NAME)

        metric_name = estimator.getEvaluator().getMetricName()
        if isinstance(estimator, CrossValidator):
            avg_metric_value = model.avgMetrics[row_index]
            avg_metric_name = f"avg_{metric_name}"
        else:
            avg_metric_value = model.validationMetrics[row_index]
            avg_metric_name = metric_name

        assert math.isclose(avg_metric_value,
                            run_data.metrics[avg_metric_name],
                            rel_tol=1e-6)
        assert math.isclose(avg_metric_value,
                            float(row.get(avg_metric_name)),
                            rel_tol=1e-6)

        if isinstance(estimator, CrossValidator) and Version(
                pyspark.__version__) >= Version("3.3"):
            std_metric_name = f"std_{metric_name}"
            std_metric_value = model.stdMetrics[row_index]
            assert math.isclose(std_metric_value,
                                run_data.metrics[std_metric_name],
                                rel_tol=1e-6)
            assert math.isclose(std_metric_value,
                                float(row.get(std_metric_name)),
                                rel_tol=1e-6)
Esempio n. 13
0
 def test_is_terminated(self):
     self.assertTrue(RunStatus.is_terminated(RunStatus.FAILED))
     self.assertTrue(RunStatus.is_terminated(RunStatus.FINISHED))
     self.assertTrue(RunStatus.is_terminated(RunStatus.KILLED))
     self.assertFalse(RunStatus.is_terminated(RunStatus.SCHEDULED))
     self.assertFalse(RunStatus.is_terminated(RunStatus.RUNNING))
from alembic import op
from packaging.version import Version
from sqlalchemy import CheckConstraint, Enum

from mlflow.entities import RunStatus, ViewType
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.store.tracking.dbmodels.models import SqlRun, SourceTypes

# revision identifiers, used by Alembic.
revision = "cfd24bdc0731"
down_revision = "2b4d017a5e9b"
branch_labels = None
depends_on = None

old_run_statuses = [
    RunStatus.to_string(RunStatus.SCHEDULED),
    RunStatus.to_string(RunStatus.FAILED),
    RunStatus.to_string(RunStatus.FINISHED),
    RunStatus.to_string(RunStatus.RUNNING),
]

new_run_statuses = [*old_run_statuses, RunStatus.to_string(RunStatus.KILLED)]

# Certain SQL backends (e.g., SQLite) do not preserve CHECK constraints during migrations.
# For these backends, CHECK constraints must be specified as table arguments. Here, we define
# the collection of CHECK constraints that should be preserved when performing the migration.
# The "status" constraint is excluded from this set because it is explicitly modified
# within the migration's `upgrade()` routine.
check_constraint_table_args = [
    CheckConstraint(SqlRun.source_type.in_(SourceTypes), name="source_type"),
    CheckConstraint(
Esempio n. 15
0
 def _create_root(self, root):
     self.test_root = os.path.join(root,
                                   "test_file_store_%d" % random_int())
     os.mkdir(self.test_root)
     self.experiments = [str(random_int(100, int(1e9))) for _ in range(3)]
     self.exp_data = {}
     self.run_data = {}
     # Include default experiment
     self.experiments.append(FileStore.DEFAULT_EXPERIMENT_ID)
     for exp in self.experiments:
         # create experiment
         exp_folder = os.path.join(self.test_root, str(exp))
         os.makedirs(exp_folder)
         d = {
             "experiment_id": exp,
             "name": random_str(),
             "artifact_location": exp_folder
         }
         self.exp_data[exp] = d
         write_yaml(exp_folder, FileStore.META_DATA_FILE_NAME, d)
         # add runs
         self.exp_data[exp]["runs"] = []
         for _ in range(2):
             run_id = uuid.uuid4().hex
             self.exp_data[exp]["runs"].append(run_id)
             run_folder = os.path.join(exp_folder, run_id)
             os.makedirs(run_folder)
             run_info = {
                 "run_uuid":
                 run_id,
                 "run_id":
                 run_id,
                 "experiment_id":
                 exp,
                 "user_id":
                 random_str(random_int(10, 25)),
                 "status":
                 random.choice(RunStatus.all_status()),
                 "start_time":
                 random_int(1, 10),
                 "end_time":
                 random_int(20, 30),
                 "tags": [],
                 "artifact_uri":
                 "%s/%s" % (run_folder, FileStore.ARTIFACTS_FOLDER_NAME),
             }
             write_yaml(run_folder, FileStore.META_DATA_FILE_NAME, run_info)
             self.run_data[run_id] = run_info
             # params
             params_folder = os.path.join(run_folder,
                                          FileStore.PARAMS_FOLDER_NAME)
             os.makedirs(params_folder)
             params = {}
             for _ in range(5):
                 param_name = random_str(random_int(4, 12))
                 param_value = random_str(random_int(10, 15))
                 param_file = os.path.join(params_folder, param_name)
                 with open(param_file, 'w') as f:
                     f.write(param_value)
                 params[param_name] = param_value
             self.run_data[run_id]["params"] = params
             # metrics
             metrics_folder = os.path.join(run_folder,
                                           FileStore.METRICS_FOLDER_NAME)
             os.makedirs(metrics_folder)
             metrics = {}
             for _ in range(3):
                 metric_name = random_str(random_int(6, 10))
                 timestamp = int(time.time())
                 metric_file = os.path.join(metrics_folder, metric_name)
                 values = []
                 for _ in range(10):
                     metric_value = random_int(100, 2000)
                     timestamp += random_int(10000, 2000000)
                     values.append((timestamp, metric_value))
                     with open(metric_file, 'a') as f:
                         f.write("%d %d\n" % (timestamp, metric_value))
                 metrics[metric_name] = values
             self.run_data[run_id]["metrics"] = metrics
             # artifacts
             os.makedirs(
                 os.path.join(run_folder, FileStore.ARTIFACTS_FOLDER_NAME))
Esempio n. 16
0
def validate_exit_status(status_str, expected):
    assert RunStatus.from_string(status_str) == expected
Esempio n. 17
0
def test_start_existing_run_status(empty_active_run_stack):  # pylint: disable=unused-argument
    run_id = mlflow.start_run().info.run_id
    mlflow.end_run()
    assert MlflowClient().get_run(run_id).info.status == RunStatus.to_string(RunStatus.FINISHED)
    restarted_run = mlflow.start_run(run_id)
    assert restarted_run.info.status == RunStatus.to_string(RunStatus.RUNNING)
Esempio n. 18
0
    ViewType,
    ExperimentTag,
)
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.store.db.base_sql_model import Base

SourceTypes = [
    SourceType.to_string(SourceType.NOTEBOOK),
    SourceType.to_string(SourceType.JOB),
    SourceType.to_string(SourceType.LOCAL),
    SourceType.to_string(SourceType.UNKNOWN),
    SourceType.to_string(SourceType.PROJECT),
]

RunStatusTypes = [
    RunStatus.to_string(RunStatus.SCHEDULED),
    RunStatus.to_string(RunStatus.FAILED),
    RunStatus.to_string(RunStatus.FINISHED),
    RunStatus.to_string(RunStatus.RUNNING),
    RunStatus.to_string(RunStatus.KILLED),
]


class SqlExperiment(Base):
    """
    DB model for :py:class:`mlflow.entities.Experiment`. These are recorded in ``experiment`` table.
    """

    __tablename__ = "experiments"

    experiment_id = Column(Integer, autoincrement=True)
Esempio n. 19
0
 def get_status(self):
     status = self._status
     return status if RunStatus.is_terminated(status) else self._update_status()
Esempio n. 20
0
 def get_status(self):
     return RunStatus.to_string(self._get_status())
Esempio n. 21
0
    def wait(self):
        kube_api = kubernetes.client.BatchV1Api()
        while not RunStatus.is_terminated(self._update_status(kube_api)):
            time.sleep(self.POLL_STATUS_INTERVAL)

        return self._status == RunStatus.FINISHED
Esempio n. 22
0
import mock

from mlflow.entities import (Experiment, Run, RunInfo, RunData, RunTag, Metric,
                             Param, ExperimentTag, RunStatus, LifecycleStage,
                             ViewType)

experiment = Experiment(experiment_id="1",
                        name="experiment_name",
                        artifact_location="artifact_location",
                        lifecycle_stage=LifecycleStage.ACTIVE,
                        tags=[])
run_info = RunInfo(run_uuid="1",
                   run_id="1",
                   experiment_id="experiment_id",
                   user_id="unknown",
                   status=RunStatus.to_string(RunStatus.RUNNING),
                   start_time=1,
                   end_time=None,
                   lifecycle_stage=LifecycleStage.ACTIVE,
                   artifact_uri="artifact_uri")
run_data = RunData(metrics=[], params=[], tags=[])
run = Run(run_info=run_info, run_data=run_data)

metric = Metric(key="metric1", value=1, timestamp=1, step=1)

param = Param(key="param1", value="val1")

tag = RunTag(key="tag1", value="val1")

experiment_tag = ExperimentTag(key="tag1", value="val1")
Esempio n. 23
0
 def get_status(self, databricks_run_id):
     return RunStatus.to_string(self._get_status(databricks_run_id))
Esempio n. 24
0
def test_parameter_search_estimators_produce_expected_outputs(
        cv_class, search_space, backend):
    mlflow.sklearn.autolog(log_input_examples=True, log_model_signatures=True)

    svc = sklearn.svm.SVC()
    cv_model = cv_class(svc, search_space, n_jobs=5, return_train_score=True)
    X, y = get_iris()

    def train_cv_model():
        if backend is None:
            cv_model.fit(X, y)
        else:
            with sklearn.utils.parallel_backend(backend=backend):
                cv_model.fit(X, y)

    with mlflow.start_run() as run:
        train_cv_model()
        run_id = run.info.run_id

    params, metrics, tags, artifacts = get_run_data(run_id)
    expected_cv_params = truncate_dict(
        stringify_dict_values(cv_model.get_params(deep=False)))
    expected_cv_params.update({
        "best_{}".format(param_name): str(param_value)
        for param_name, param_value in cv_model.best_params_.items()
    })
    assert params == expected_cv_params
    assert {
        TRAINING_SCORE: cv_model.score(X, y),
        "best_cv_score": cv_model.best_score_,
    }.items() <= metrics.items()
    assert tags == get_expected_class_tags(cv_model)
    assert MODEL_DIR in artifacts
    assert "best_estimator" in artifacts
    assert "cv_results.csv" in artifacts

    best_estimator = mlflow.sklearn.load_model(
        "runs:/{}/best_estimator".format(run_id))
    assert isinstance(best_estimator, sklearn.svm.SVC)
    cv_model = mlflow.sklearn.load_model("runs:/{}/{}".format(
        run_id, MODEL_DIR))
    assert isinstance(cv_model, cv_class)

    # Ensure that a signature and input example are produced for the best estimator
    best_estimator_conf = get_model_conf(run.info.artifact_uri,
                                         "best_estimator")
    assert best_estimator_conf.signature == infer_signature(
        X, best_estimator.predict(X[:5]))

    best_estimator_path = os.path.join(run.info.artifact_uri, "best_estimator")
    input_example = _read_example(best_estimator_conf, best_estimator_path)
    best_estimator.predict(
        input_example)  # Ensure that input example evaluation succeeds

    client = mlflow.tracking.MlflowClient()
    child_runs = client.search_runs(
        run.info.experiment_id,
        "tags.`mlflow.parentRunId` = '{}'".format(run_id))
    cv_results = pd.DataFrame.from_dict(cv_model.cv_results_)
    # We expect to have created a child run for each point in the parameter search space
    assert len(child_runs) == len(cv_results)

    # Verify that each set of parameter search results has a corresponding MLflow run
    # with the expected data
    for _, result in cv_results.iterrows():
        result_params = result.get("params", {})
        params_search_clause = " and ".join([
            "params.`{}` = '{}'".format(key, value)
            for key, value in result_params.items()
        ])
        search_filter = "tags.`mlflow.parentRunId` = '{}' and {}".format(
            run_id, params_search_clause)
        child_runs = client.search_runs(run.info.experiment_id, search_filter)
        assert len(child_runs) == 1
        child_run = child_runs[0]
        assert child_run.info.status == RunStatus.to_string(RunStatus.FINISHED)
        _, child_metrics, child_tags, _ = get_run_data(child_run.info.run_id)
        assert child_tags == get_expected_class_tags(svc)
        assert "mean_test_score" in child_metrics.keys()
        assert "std_test_score" in child_metrics.keys()
        # Ensure that we do not capture separate metrics for each cross validation split, which
        # would produce very noisy metrics results
        assert len([
            metric for metric in child_metrics.keys()
            if metric.startswith("split")
        ]) == 0
Esempio n. 25
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """

    __tablename__ = "runs"

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey("experiments.experiment_id"))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship("SqlExperiment",
                              backref=backref("runs", cascade="all"))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (
        CheckConstraint(source_type.in_(SourceTypes), name="source_type"),
        CheckConstraint(status.in_(RunStatusTypes), name="status"),
        CheckConstraint(
            lifecycle_stage.in_(
                LifecycleStage.view_type_to_stages(ViewType.ALL)),
            name="runs_lifecycle_stage",
        ),
        PrimaryKeyConstraint("run_uuid", name="run_pk"),
    )

    @staticmethod
    def get_attribute_name(mlflow_attribute_name):
        """
        Resolves an MLflow attribute name to a `SqlRun` attribute name.
        """
        # Currently, MLflow Search attributes defined in `SearchUtils.VALID_SEARCH_ATTRIBUTE_KEYS`
        # share the same names as their corresponding `SqlRun` attributes. Therefore, this function
        # returns the same attribute name
        return mlflow_attribute_name

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(
            run_uuid=self.run_uuid,
            run_id=self.run_uuid,
            experiment_id=str(self.experiment_id),
            user_id=self.user_id,
            status=self.status,
            start_time=self.start_time,
            end_time=self.end_time,
            lifecycle_stage=self.lifecycle_stage,
            artifact_uri=self.artifact_uri,
        )

        run_data = RunData(
            metrics=[m.to_mlflow_entity() for m in self.latest_metrics],
            params=[p.to_mlflow_entity() for p in self.params],
            tags=[t.to_mlflow_entity() for t in self.tags],
        )

        return Run(run_info=run_info, run_data=run_data)
Esempio n. 26
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20), default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20), default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship('SqlExperiment', backref=backref('runs', cascade='all'))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (
        CheckConstraint(source_type.in_(SourceTypes), name='source_type'),
        CheckConstraint(status.in_(RunStatusTypes), name='status'),
        CheckConstraint(lifecycle_stage.in_(LifecycleStage.view_type_to_stages(ViewType.ALL)),
                        name='lifecycle_stage'),
        PrimaryKeyConstraint('run_uuid', name='run_pk')
    )

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        # run has diff parameter names in __init__ than in properties_ so we do this manually
        info = _create_entity(RunInfo, self)
        data = _create_entity(RunData, self)
        return Run(run_info=info, run_data=data)
Esempio n. 27
0
 def _populate_tables(self,
                      exp_count=3,
                      run_count=2,
                      param_count=5,
                      metric_count=3,
                      values_count=10):
     print("populate tables")
     self.experiments = [
         str(random_int(100, int(1e9))) for _ in range(exp_count)
     ]
     self.exp_data = {}
     self.run_data = {}
     self.experiments.append(TestDynamodbStore.DEFAULT_EXPERIMENT_ID)
     for exp in self.experiments:
         # create experiment
         exp_folder = os.path.join(self.table_prefix, exp)
         d = {
             "experiment_id": exp,
             "name": random_str(),
             "artifact_location": exp_folder,
             "lifecycle_stage": LifecycleStage.ACTIVE,  # Required for tests
         }
         self.exp_data[exp] = d
         self._write_table(DynamodbStore.EXPERIMENT_TABLE, d)
         # add runs
         self.exp_data[exp]["runs"] = []
         for _ in range(run_count):
             run_id = uuid.uuid4().hex
             self.exp_data[exp]["runs"].append(run_id)
             run_folder = os.path.join(exp_folder, run_id)
             run_info = {
                 "run_uuid": run_id,
                 "run_id": run_id,
                 "experiment_id": exp,
                 "user_id": random_str(random_int(10, 25)),
                 "status": random.choice(RunStatus.all_status()),
                 "start_time": random_int(1, 10),
                 "end_time": random_int(20, 30),
                 "tags": [],
                 "artifact_uri": "{}/artifacts".format(run_folder),
                 "lifecycle_stage":
                 LifecycleStage.ACTIVE,  # Required for tests
             }
             self._write_table("run", run_info)
             self.run_data[run_id] = run_info
             # params
             params = {}
             for _ in range(param_count):
                 param_name = random_str(random_int(4, 12))
                 param_value = random_str(random_int(10, 15))
                 self._write_table(
                     "run_param",
                     {
                         "run_id": run_id,
                         "key": param_name,
                         "value": param_value
                     },
                 )
                 params[param_name] = param_value
             self.run_data[run_id]["params"] = params
             # metrics
             metrics = {}
             for _ in range(metric_count):
                 metric_name = random_str(random_int(6, 10))
                 timestamp = int(time.time())
                 values, values_map = [], []
                 for i in range(values_count):
                     metric_value = random_int(i * 100, (i * 1) * 100)
                     timestamp += random_int(i * 1000, (i + 1) * 1000)
                     values.append((timestamp, metric_value))
                     values_map.insert(0, {
                         "timestamp": timestamp,
                         "value": metric_value
                     })
                 self._write_table(
                     "run_metric",
                     {
                         "run_id": run_id,
                         "key": metric_name,
                         "metrics": values_map
                     },
                 )
                 metrics[metric_name] = values
             self.run_data[run_id]["metrics"] = metrics
Esempio n. 28
0
def flownet_ahm_run(x: list, args: argparse.Namespace):
    """
    Run individual ahm using the actual hyperparameter values for the run.

    Args:
        x: Actual values for the hyperparameters.
        args: The argparse namespace given by the user.

    Returns:
        Nothing

    """
    config = create_ahm_config(
        base_config=args.config,
        hyperparameter_values=x,
        update_config=args.update_config,
    )

    mlflow.set_tracking_uri(str(args.output_folder))
    mlflow.set_experiment(f"{config.name}")
    mlflow.start_run(run_name=config.name)

    run_args = copy.deepcopy(args)
    run_args.output_folder = pathlib.Path(
        mlflow.get_artifact_uri().rsplit("artifacts")[0] + "flownet_run")
    try:
        parameters = list_hyperparameters_names(
            yaml.safe_load(args.config.read_text()), [])

        for (parameter, param_value) in zip(parameters, x):
            mlflow.log_param(key=parameter, value=param_value)

        run_flownet_history_matching(config, run_args)

        df_analytics = pd.read_csv(
            (run_args.output_folder / config.ert.analysis[0].outfile
             ).with_suffix(".csv")).drop_duplicates()

        hyperopt_loss = 0.0
        for _, row in df_analytics.iterrows():
            for i, metric in enumerate(df_analytics.columns[2:]):
                key = f"{row[0]}_{metric}"
                mlflow.log_metric(
                    key=key.replace(":", "."),
                    value=row[i + 2],
                    step=row[1],
                )

                if (row[1] == df_analytics["iteration"].max()
                        and row[0] in config.flownet.hyperopt.loss.keys
                        and metric == config.flownet.hyperopt.loss.metric):
                    hyperopt_loss += (
                        row[i + 2] * config.flownet.hyperopt.loss.factors[
                            config.flownet.hyperopt.loss.keys.index(row[0])])

        mlflow.log_metric("hyperopt_loss", value=hyperopt_loss)

        mlflow.end_run(status=RunStatus.to_string(RunStatus.FINISHED))
        return {"loss": hyperopt_loss, "status": STATUS_OK}

    except Exception as exception:  # pylint: disable=broad-except
        print(exception)
        mlflow.end_run(status=RunStatus.to_string(RunStatus.FAILED))
        return {"status": STATUS_FAIL}
Esempio n. 29
0
 def __exit__(self, exc_type, exc_val, exc_tb):
     status = RunStatus.FINISHED if exc_type is None else RunStatus.FAILED
     end_run(RunStatus.to_string(status))
     return exc_type is None
Esempio n. 30
0
class SqlRun(Base):
    """
    DB model for :py:class:`mlflow.entities.Run`. These are recorded in ``runs`` table.
    """
    __tablename__ = 'runs'

    run_uuid = Column(String(32), nullable=False)
    """
    Run UUID: `String` (limit 32 characters). *Primary Key* for ``runs`` table.
    """
    name = Column(String(250))
    """
    Run name: `String` (limit 250 characters).
    """
    source_type = Column(String(20),
                         default=SourceType.to_string(SourceType.LOCAL))
    """
    Source Type: `String` (limit 20 characters). Can be one of ``NOTEBOOK``, ``JOB``, ``PROJECT``,
                 ``LOCAL`` (default), or ``UNKNOWN``.
    """
    source_name = Column(String(500))
    """
    Name of source recording the run: `String` (limit 500 characters).
    """
    entry_point_name = Column(String(50))
    """
    Entry-point name that launched the run run: `String` (limit 50 characters).
    """
    user_id = Column(String(256), nullable=True, default=None)
    """
    User ID: `String` (limit 256 characters). Defaults to ``null``.
    """
    status = Column(String(20),
                    default=RunStatus.to_string(RunStatus.SCHEDULED))
    """
    Run Status: `String` (limit 20 characters). Can be one of ``RUNNING``, ``SCHEDULED`` (default),
                ``FINISHED``, ``FAILED``.
    """
    start_time = Column(BigInteger, default=int(time.time()))
    """
    Run start time: `BigInteger`. Defaults to current system time.
    """
    end_time = Column(BigInteger, nullable=True, default=None)
    """
    Run end time: `BigInteger`.
    """
    source_version = Column(String(50))
    """
    Source version: `String` (limit 50 characters).
    """
    lifecycle_stage = Column(String(20), default=LifecycleStage.ACTIVE)
    """
    Lifecycle Stage of run: `String` (limit 32 characters).
                            Can be either ``active`` (default) or ``deleted``.
    """
    artifact_uri = Column(String(200), default=None)
    """
    Default artifact location for this run: `String` (limit 200 characters).
    """
    experiment_id = Column(Integer, ForeignKey('experiments.experiment_id'))
    """
    Experiment ID to which this run belongs to: *Foreign Key* into ``experiment`` table.
    """
    experiment = relationship('SqlExperiment',
                              backref=backref('runs', cascade='all'))
    """
    SQLAlchemy relationship (many:one) with :py:class:`mlflow.store.dbmodels.models.SqlExperiment`.
    """

    __table_args__ = (CheckConstraint(source_type.in_(SourceTypes),
                                      name='source_type'),
                      CheckConstraint(status.in_(RunStatusTypes),
                                      name='status'),
                      CheckConstraint(lifecycle_stage.in_(
                          LifecycleStage.view_type_to_stages(ViewType.ALL)),
                                      name='runs_lifecycle_stage'),
                      PrimaryKeyConstraint('run_uuid', name='run_pk'))

    def to_mlflow_entity(self):
        """
        Convert DB model to corresponding MLflow entity.

        :return: :py:class:`mlflow.entities.Run`.
        """
        run_info = RunInfo(run_uuid=self.run_uuid,
                           run_id=self.run_uuid,
                           experiment_id=str(self.experiment_id),
                           user_id=self.user_id,
                           status=self.status,
                           start_time=self.start_time,
                           end_time=self.end_time,
                           lifecycle_stage=self.lifecycle_stage,
                           artifact_uri=self.artifact_uri)

        # only get latest recorded metrics per key
        all_metrics = [m.to_mlflow_entity() for m in self.metrics]
        metrics = {}
        for m in all_metrics:
            existing_metric = metrics.get(m.key)
            if (existing_metric is None)\
                or ((m.step, m.timestamp, m.value) >=
                    (existing_metric.step, existing_metric.timestamp,
                        existing_metric.value)):
                metrics[m.key] = m

        run_data = RunData(metrics=list(metrics.values()),
                           params=[p.to_mlflow_entity() for p in self.params],
                           tags=[t.to_mlflow_entity() for t in self.tags])

        return Run(run_info=run_info, run_data=run_data)