コード例 #1
0
ファイル: __init__.py プロジェクト: yassineAlouini/mlflow
def _maybe_set_run_terminated(active_run, status):
    """
    If the passed-in active run is defined and still running (i.e. hasn't already been terminated
    within user code), mark it as terminated with the passed-in status.
    """
    if active_run is None:
        return
    run_id = active_run.info.run_uuid
    cur_status = tracking.get_service().get_run(run_id).info.status
    if RunStatus.is_terminated(cur_status):
        return
    tracking.get_service().set_terminated(run_id, status)
コード例 #2
0
ファイル: databricks.py プロジェクト: yassineAlouini/mlflow
 def _print_description_and_log_tags(self):
     eprint(
         "=== Launched MLflow run as Databricks job run with ID %s. Getting run status "
         "page URL... ===" % self._databricks_run_id)
     run_info = self._job_runner.jobs_runs_get(self._databricks_run_id)
     jobs_page_url = run_info["run_page_url"]
     eprint("=== Check the run's status at %s ===" % jobs_page_url)
     host_creds = databricks_utils.get_databricks_host_creds(
         self._job_runner.databricks_profile)
     tracking.get_service().set_tag(self._mlflow_run_id,
                                    MLFLOW_DATABRICKS_RUN_URL,
                                    jobs_page_url)
     tracking.get_service().set_tag(self._mlflow_run_id,
                                    MLFLOW_DATABRICKS_SHELL_JOB_RUN_ID,
                                    self._databricks_run_id)
     tracking.get_service().set_tag(self._mlflow_run_id,
                                    MLFLOW_DATABRICKS_WEBAPP_URL,
                                    host_creds.host)
     job_id = run_info.get('job_id')
     # In some releases of Databricks we do not return the job ID. We start including it in DB
     # releases 2.80 and above.
     if job_id is not None:
         tracking.get_service().set_tag(self._mlflow_run_id,
                                        MLFLOW_DATABRICKS_SHELL_JOB_ID,
                                        job_id)
コード例 #3
0
def test_log_metric_validation():
    try:
        tracking.set_tracking_uri(tempfile.mkdtemp())
        active_run = start_run()
        run_uuid = active_run.info.run_uuid
        with active_run:
            mlflow.log_metric("name_1", "apple")
        finished_run = tracking.get_service().get_run(run_uuid)
        assert len(finished_run.data.metrics) == 0
    finally:
        tracking.set_tracking_uri(None)
コード例 #4
0
ファイル: __init__.py プロジェクト: lgromanowski/mlflow
def _create_run(uri, experiment_id, work_dir, entry_point, parameters):
    """
    Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI,
    entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be
    used to report additional data about the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    active_run = tracking.get_service().create_run(
        experiment_id=experiment_id,
        source_name=source_name,
        source_version=_get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    if parameters is not None:
        for key, value in parameters.items():
            tracking.get_service().log_param(active_run.info.run_uuid, key, value)
    return active_run
コード例 #5
0
def test_start_and_end_run():
    try:
        tracking.set_tracking_uri(tempfile.mkdtemp())
        # Use the start_run() and end_run() APIs without a `with` block, verify they work.
        active_run = start_run()
        mlflow.log_metric("name_1", 25)
        end_run()
        finished_run = tracking.get_service().get_run(active_run.info.run_uuid)
        # Validate metrics
        assert len(finished_run.data.metrics) == 1
        expected_pairs = {"name_1": 25}
        for metric in finished_run.data.metrics:
            assert expected_pairs[metric.key] == metric.value
    finally:
        tracking.set_tracking_uri(None)
コード例 #6
0
def test_start_run_context_manager():
    try:
        tracking.set_tracking_uri(tempfile.mkdtemp())
        first_run = start_run()
        first_uuid = first_run.info.run_uuid
        with first_run:
            # Check that start_run() causes the run information to be persisted in the store
            persisted_run = tracking.get_service().get_run(first_uuid)
            assert persisted_run is not None
            assert persisted_run.info == first_run.info
        finished_run = tracking.get_service().get_run(first_uuid)
        assert finished_run.info.status == RunStatus.FINISHED
        # Launch a separate run that fails, verify the run status is FAILED and the run UUID is
        # different
        second_run = start_run()
        assert second_run.info.run_uuid != first_uuid
        with pytest.raises(Exception):
            with second_run:
                raise Exception("Failing run!")
        finished_run2 = tracking.get_service().get_run(
            second_run.info.run_uuid)
        assert finished_run2.info.status == RunStatus.FAILED
    finally:
        tracking.set_tracking_uri(None)
コード例 #7
0
def test_log_metric():
    try:
        tracking.set_tracking_uri(tempfile.mkdtemp())
        active_run = start_run()
        run_uuid = active_run.info.run_uuid
        with active_run:
            mlflow.log_metric("name_1", 25)
            mlflow.log_metric("name_2", -3)
            mlflow.log_metric("name_1", 30)
            mlflow.log_metric("nested/nested/name", 40)
        finished_run = tracking.get_service().get_run(run_uuid)
        # Validate metrics
        assert len(finished_run.data.metrics) == 3
        expected_pairs = {"name_1": 30, "name_2": -3, "nested/nested/name": 40}
        for metric in finished_run.data.metrics:
            assert expected_pairs[metric.key] == metric.value
    finally:
        tracking.set_tracking_uri(None)
コード例 #8
0
ファイル: __init__.py プロジェクト: lgromanowski/mlflow
def _wait_for(submitted_run_obj):
    """Wait on the passed-in submitted run, reporting its status to the tracking server."""
    run_id = submitted_run_obj.run_id
    active_run = None
    # Note: there's a small chance we fail to report the run's status to the tracking server if
    # we're interrupted before we reach the try block below
    try:
        active_run = tracking.get_service().get_run(run_id) if run_id is not None else None
        if submitted_run_obj.wait():
            eprint("=== Run (ID '%s') succeeded ===" % run_id)
            _maybe_set_run_terminated(active_run, "FINISHED")
        else:
            _maybe_set_run_terminated(active_run, "FAILED")
            raise ExecutionException("Run (ID '%s') failed" % run_id)
    except KeyboardInterrupt:
        eprint("=== Run (ID '%s') interrupted, cancelling run ===" % run_id)
        submitted_run_obj.cancel()
        _maybe_set_run_terminated(active_run, "FAILED")
        raise
コード例 #9
0
ファイル: __init__.py プロジェクト: lgromanowski/mlflow
def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=None,
         mode=None, cluster_spec=None, git_username=None, git_password=None, use_conda=True,
         storage_dir=None, block=True, run_id=None):
    """
    Helper that delegates to the project-running method corresponding to the passed-in mode.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    exp_id = experiment_id or _get_experiment_id()
    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version,
                              git_username=git_username, git_password=git_password)
    project = _project_spec.load_project(work_dir)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    if run_id:
        active_run = tracking.get_service().get_run(run_id)
    else:
        active_run = _create_run(uri, exp_id, work_dir, entry_point, parameters)

    if mode == "databricks":
        from mlflow.projects.databricks import run_databricks
        return run_databricks(
            remote_run=active_run,
            uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters,
            experiment_id=exp_id, cluster_spec=cluster_spec)
    elif mode == "local" or mode is None:
        # Synchronously create a conda environment (even though this may take some time) to avoid
        # failures due to multiple concurrent attempts to create the same conda env.
        conda_env_name = _get_or_create_conda_env(project.conda_env_path) if use_conda else None
        # In blocking mode, run the entry point command in blocking fashion, sending status updates
        # to the tracking server when finished. Note that the run state may not be persisted to the
        # tracking server if interrupted
        if block:
            command = _get_entry_point_command(
                project, entry_point, parameters, conda_env_name, storage_dir)
            return _run_entry_point(command, work_dir, exp_id, run_id=active_run.info.run_uuid)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir, entry_point=entry_point, parameters=parameters, experiment_id=exp_id,
            use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_uuid)
    supported_modes = ["local", "databricks"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (mode, supported_modes))
コード例 #10
0
def run_databricks(uri, entry_point, version, parameters, experiment_id,
                   cluster_spec, git_username, git_password):
    """
    Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be
    used to query the run's status or wait for the resulting Databricks Job run to terminate.
    """
    tracking_uri = tracking.get_tracking_uri()
    _before_run_validations(tracking_uri, cluster_spec)
    work_dir = _fetch_and_clean_project(uri=uri,
                                        version=version,
                                        git_username=git_username,
                                        git_password=git_password)
    project = _project_spec.load_project(work_dir)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    dbfs_fuse_uri = _upload_project_to_dbfs(work_dir, experiment_id)
    remote_run = tracking.get_service().create_run(
        experiment_id=experiment_id,
        source_name=_expand_uri(uri),
        source_version=_get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    env_vars = {
        tracking._TRACKING_URI_ENV_VAR: tracking_uri,
        tracking._EXPERIMENT_ID_ENV_VAR: experiment_id,
    }
    run_id = remote_run.info.run_uuid
    eprint("=== Running entry point %s of project %s on Databricks. ===" %
           (entry_point, uri))
    # Launch run on Databricks
    with open(cluster_spec, 'r') as handle:
        try:
            cluster_spec = json.load(handle)
        except ValueError:
            eprint(
                "Error when attempting to load and parse JSON cluster spec from file "
                "%s. " % cluster_spec)
            raise
    command = _get_databricks_run_cmd(dbfs_fuse_uri, run_id, entry_point,
                                      parameters)
    db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec)
    return DatabricksSubmittedRun(db_run_id, run_id)
コード例 #11
0
def test_log_param():
    try:
        tracking.set_tracking_uri(tempfile.mkdtemp())
        active_run = start_run()
        run_uuid = active_run.info.run_uuid
        with active_run:
            mlflow.log_param("name_1", "a")
            mlflow.log_param("name_2", "b")
            mlflow.log_param("name_1", "c")
            mlflow.log_param("nested/nested/name", 5)
        finished_run = tracking.get_service().get_run(run_uuid)
        # Validate params
        assert len(finished_run.data.params) == 3
        expected_pairs = {
            "name_1": "c",
            "name_2": "b",
            "nested/nested/name": "5"
        }
        for param in finished_run.data.params:
            assert expected_pairs[param.key] == param.value
    finally:
        tracking.set_tracking_uri(None)
コード例 #12
0
def set_tag_mock():
    with mock.patch("mlflow.projects.databricks.tracking.get_service") as m:
        mlflow_service_mock = mock.Mock(wraps=get_service())
        m.return_value = mlflow_service_mock
        yield mlflow_service_mock.set_tag
コード例 #13
0
ファイル: main.py プロジェクト: jmquintana79/utilsDS
"""

import sys
sys.path.append('../')
from models.metrics import metrics_regression

import click
import warnings
import mlflow
import mlflow.sklearn


# # available experiments
from mlflow.tracking import get_service
# get service
service = get_service()
# returns a list of mlflow.entities.Experiment
experiments = service.list_experiments()
# collect experiments information
lidexp = list()
didexp = dict()
for ii, iexp in enumerate(experiments):
    lidexp.append(iexp.experiment_id)
    didexp[iexp.experiment_id] = {'name': iexp.name, 'location': iexp.artifact_location}


# # ARGUMENTS: GENERAL
from experiments.arguments import *


# # LOAD DATA