예제 #1
0
def run_databricks(uri, entry_point, version, parameters, experiment_id,
                   cluster_spec, git_username, git_password):
    """
    Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be
    used to query the run's status or wait for the resulting Databricks Job run to terminate.
    """
    tracking_uri = tracking.get_tracking_uri()
    _before_run_validations(tracking_uri, cluster_spec)
    work_dir = _fetch_and_clean_project(uri=uri,
                                        version=version,
                                        git_username=git_username,
                                        git_password=git_password)
    project = _project_spec.load_project(work_dir)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    dbfs_fuse_uri = _upload_project_to_dbfs(work_dir, experiment_id)
    remote_run = tracking._create_run(
        experiment_id=experiment_id,
        source_name=_expand_uri(uri),
        source_version=tracking._get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    env_vars = {
        tracking._TRACKING_URI_ENV_VAR: tracking_uri,
        tracking._EXPERIMENT_ID_ENV_VAR: experiment_id,
    }
    run_id = remote_run.run_info.run_uuid
    eprint("=== Running entry point %s of project %s on Databricks. ===" %
           (entry_point, uri))
    # Launch run on Databricks
    with open(cluster_spec, 'r') as handle:
        try:
            cluster_spec = json.load(handle)
        except ValueError:
            eprint(
                "Error when attempting to load and parse JSON cluster spec from file "
                "%s. " % cluster_spec)
            raise
    command = _get_databricks_run_cmd(dbfs_fuse_uri, run_id, entry_point,
                                      parameters)
    db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec)
    return DatabricksSubmittedRun(db_run_id, run_id)
예제 #2
0
def test_load_project(tmpdir, mlproject, conda_env_path, conda_env_contents, mlproject_path):
    """
    Test that we can load a project with various combinations of an MLproject / conda.yaml file
    :param mlproject: Contents of MLproject file. If None, no MLproject file will be written
    :param conda_env_path: Path to conda environment file. If None, no conda environment file will
                           be written.
    :param conda_env_contents: Contents of conda environment file (written if conda_env_path is
                               not None)
    """
    if mlproject:
        tmpdir.join(mlproject_path).write(mlproject)
    if conda_env_path:
        tmpdir.join(conda_env_path).write(conda_env_contents)
    project = _project_spec.load_project(tmpdir.strpath)
    assert project._entry_points == {}
    expected_env_path = (
        os.path.abspath(os.path.join(tmpdir.strpath, conda_env_path)) if conda_env_path else None
    )
    assert project.env_config_path == expected_env_path
    if conda_env_path:
        assert open(project.env_config_path).read() == conda_env_contents
예제 #3
0
def _run(uri, experiment_id, entry_point="main", version=None, parameters=None,
         backend=None, backend_config=None, use_conda=True,
         storage_dir=None, synchronous=True, run_id=None):
    """
    Helper that delegates to the project-running method corresponding to the passed-in backend.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """

    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version)
    project = _project_spec.load_project(work_dir)
    _validate_execution_environment(project, backend)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    if run_id:
        active_run = tracking.MlflowClient().get_run(run_id)
    else:
        active_run = _create_run(uri, experiment_id, work_dir, entry_point)

    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(parameters, storage_dir=None)
    for key, value in (list(final_params.items()) + list(extra_params.items())):
        tracking.MlflowClient().log_param(active_run.info.run_id, key, value)

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        for tag in [MLFLOW_GIT_REPO_URL, LEGACY_MLFLOW_GIT_REPO_URL]:
            tracking.MlflowClient().set_tag(active_run.info.run_id, tag, repo_url)

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        for tag in [MLFLOW_GIT_BRANCH, LEGACY_MLFLOW_GIT_BRANCH_NAME]:
            tracking.MlflowClient().set_tag(active_run.info.run_id, tag, version)

    if backend == "databricks":
        tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND,
                                        "databricks")
        from mlflow.projects.databricks import run_databricks
        return run_databricks(
            remote_run=active_run,
            uri=uri, entry_point=entry_point, work_dir=work_dir, parameters=parameters,
            experiment_id=experiment_id, cluster_spec=backend_config)

    elif backend == "local" or backend is None:
        command = []
        command_separator = " "
        # If a docker_env attribute is defined in MLproject then it takes precedence over conda yaml
        # environments, so the project will be executed inside a docker container.
        if project.docker_env:
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV,
                                            "docker")
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND,
                                            "local")
            _validate_docker_env(project)
            _validate_docker_installation()
            image = _build_docker_image(work_dir=work_dir,
                                        repository_uri=project.name,
                                        base_image=project.docker_env.get('image'),
                                        run_id=active_run.info.run_id)
            command += _get_docker_command(image=image, active_run=active_run)
        # Synchronously create a conda environment (even though this may take some time)
        # to avoid failures due to multiple concurrent attempts to create the same conda env.
        elif use_conda:
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "conda")
            tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND, "local")
            command_separator = " && "
            conda_env_name = _get_or_create_conda_env(project.conda_env_path)
            command += _get_conda_command(conda_env_name)
        # In synchronous mode, run the entry point command in a blocking fashion, sending status
        # updates to the tracking server when finished. Note that the run state may not be
        # persisted to the tracking server if interrupted
        if synchronous:
            command += _get_entry_point_command(project, entry_point, parameters, storage_dir)
            command = command_separator.join(command)
            return _run_entry_point(command, work_dir, experiment_id,
                                    run_id=active_run.info.run_id)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(
            work_dir=work_dir, entry_point=entry_point, parameters=parameters,
            experiment_id=experiment_id,
            use_conda=use_conda, storage_dir=storage_dir, run_id=active_run.info.run_id)
    elif backend == "kubernetes":
        from mlflow.projects import kubernetes as kb
        tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_ENV, "docker")
        tracking.MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND,
                                        "kubernetes")
        _validate_docker_env(project)
        _validate_docker_installation()
        kube_config = _parse_kubernetes_config(backend_config)
        image = _build_docker_image(work_dir=work_dir,
                                    repository_uri=kube_config["repository-uri"],
                                    base_image=project.docker_env.get('image'),
                                    run_id=active_run.info.run_id)
        image_digest = kb.push_image_to_registry(image.tags[0])
        submitted_run = kb.run_kubernetes_job(project.name,
                                              active_run,
                                              image.tags[0],
                                              image_digest,
                                              _get_entry_point_command(project, entry_point,
                                                                       parameters, storage_dir),
                                              _get_run_env_vars(
                                                run_id=active_run.info.run_uuid,
                                                experiment_id=active_run.info.experiment_id),
                                              kube_config['kube-context'],
                                              kube_config['kube-job-template'])
        return submitted_run

    supported_backends = ["local", "databricks", "kubernetes"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (backend, supported_backends))
예제 #4
0
def load_project():
    """ Loads an example project for use in tests, returning an in-memory `Project` object. """
    return _project_spec.load_project(TEST_PROJECT_DIR)
예제 #5
0
def test_docker_invalid_project_backend_local():
    work_dir = "./examples/docker"
    project = _project_spec.load_project(work_dir)
    project.name = None
    with pytest.raises(ExecutionException):
        mlflow.projects.docker.validate_docker_env(project)
예제 #6
0
def test_docker_valid_project_backend_local():
    work_dir = "./examples/docker"
    project = _project_spec.load_project(work_dir)
    mlflow.projects.docker.validate_docker_env(project)
예제 #7
0
def test_load_invalid_project(tmpdir, invalid_project_contents, expected_error_msg):
    tmpdir.join("MLproject").write(invalid_project_contents)
    with pytest.raises(ExecutionException, match=expected_error_msg) as e:
        _project_spec.load_project(tmpdir.strpath)
    assert expected_error_msg in str(e.value)
예제 #8
0
def _run(uri,
         entry_point="main",
         version=None,
         parameters=None,
         experiment_id=None,
         mode=None,
         cluster_spec=None,
         git_username=None,
         git_password=None,
         use_conda=True,
         storage_dir=None,
         block=True,
         run_id=None):
    """
    Helper that delegates to the project-running method corresponding to the passed-in mode.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    exp_id = experiment_id or _get_experiment_id()
    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri,
                              force_tempdir=False,
                              version=version,
                              git_username=git_username,
                              git_password=git_password)
    project = _project_spec.load_project(work_dir)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    if run_id:
        active_run = tracking.get_service().get_run(run_id)
    else:
        active_run = _create_run(uri, exp_id, work_dir, entry_point)

    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(
        parameters, storage_dir=None)
    for key, value in (list(final_params.items()) +
                       list(extra_params.items())):
        tracking.get_service().log_param(active_run.info.run_uuid, key, value)

    if mode == "databricks":
        from mlflow.projects.databricks import run_databricks
        return run_databricks(remote_run=active_run,
                              uri=uri,
                              entry_point=entry_point,
                              work_dir=work_dir,
                              parameters=parameters,
                              experiment_id=exp_id,
                              cluster_spec=cluster_spec)
    elif mode == "local" or mode is None:
        # Synchronously create a conda environment (even though this may take some time) to avoid
        # failures due to multiple concurrent attempts to create the same conda env.
        conda_env_name = _get_or_create_conda_env(
            project.conda_env_path) if use_conda else None
        # In blocking mode, run the entry point command in blocking fashion, sending status updates
        # to the tracking server when finished. Note that the run state may not be persisted to the
        # tracking server if interrupted
        if block:
            command = _get_entry_point_command(project, entry_point,
                                               parameters, conda_env_name,
                                               storage_dir)
            return _run_entry_point(command,
                                    work_dir,
                                    exp_id,
                                    run_id=active_run.info.run_uuid)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                             entry_point=entry_point,
                                             parameters=parameters,
                                             experiment_id=exp_id,
                                             use_conda=use_conda,
                                             storage_dir=storage_dir,
                                             run_id=active_run.info.run_uuid)
    supported_modes = ["local", "databricks"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (mode, supported_modes))
예제 #9
0
def _run(uri,
         entry_point="main",
         version=None,
         parameters=None,
         experiment_name=None,
         experiment_id=None,
         mode=None,
         cluster_spec=None,
         git_username=None,
         git_password=None,
         use_conda=True,
         storage_dir=None,
         block=True,
         run_id=None):
    """
    Helper that delegates to the project-running method corresponding to the passed-in mode.
    Returns a ``SubmittedRun`` corresponding to the project run.
    """
    if mode == "databricks":
        mlflow.projects.databricks.before_run_validations(
            mlflow.get_tracking_uri(), cluster_spec)

    if experiment_name:
        exp_id = tracking.MlflowClient().get_experiment_by_name(
            experiment_name)
    else:
        exp_id = experiment_id or _get_experiment_id()
    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri,
                              force_tempdir=False,
                              version=version,
                              git_username=git_username,
                              git_password=git_password)
    project = _project_spec.load_project(work_dir)
    _validate_execution_environment(project, mode)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    if run_id:
        active_run = tracking.MlflowClient().get_run(run_id)
    else:
        active_run = _create_run(uri, exp_id, work_dir, entry_point)

    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(
        parameters, storage_dir=None)
    for key, value in (list(final_params.items()) +
                       list(extra_params.items())):
        tracking.MlflowClient().log_param(active_run.info.run_uuid, key, value)

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                        MLFLOW_GIT_REPO_URL, repo_url)

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                        MLFLOW_GIT_BRANCH_NAME, version)

    if mode == "databricks":
        from mlflow.projects.databricks import run_databricks
        return run_databricks(remote_run=active_run,
                              uri=uri,
                              entry_point=entry_point,
                              work_dir=work_dir,
                              parameters=parameters,
                              experiment_id=exp_id,
                              cluster_spec=cluster_spec)

    elif mode == "local" or mode is None:
        command = []
        command_separator = " "
        # If a docker_env attribute is defined in MLProject then it takes precedence over conda yaml
        # environments, so the project will be executed inside a docker container.
        if project.docker_env:
            tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                            MLFLOW_ENV, MLFLOW_DOCKER)
            _validate_docker_env(project.docker_env)
            _validate_docker_installation()
            image = _build_docker_image(work_dir=work_dir,
                                        project=project,
                                        active_run=active_run)
            command += _get_docker_command(image=image, active_run=active_run)
        # Synchronously create a conda environment (even though this may take some time)
        # to avoid failures due to multiple concurrent attempts to create the same conda env.
        elif use_conda:
            tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                            MLFLOW_ENV, MLFLOW_CONDA)
            command_separator = " && "
            conda_env_name = _get_or_create_conda_env(project.conda_env_path)
            command += _get_conda_command(conda_env_name)
        # In blocking mode, run the entry point command in blocking fashion, sending status updates
        # to the tracking server when finished. Note that the run state may not be persisted to the
        # tracking server if interrupted
        if block:
            command += _get_entry_point_command(project, entry_point,
                                                parameters, storage_dir)
            command = command_separator.join(command)
            return _run_entry_point(command,
                                    work_dir,
                                    exp_id,
                                    run_id=active_run.info.run_uuid)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                             entry_point=entry_point,
                                             parameters=parameters,
                                             experiment_id=exp_id,
                                             use_conda=use_conda,
                                             storage_dir=storage_dir,
                                             run_id=active_run.info.run_uuid)
    supported_modes = ["local", "databricks"]
    raise ExecutionException("Got unsupported execution mode %s. Supported "
                             "values: %s" % (mode, supported_modes))
예제 #10
0
def load_project(work_dir):
    return _project_spec.load_project(work_dir)
예제 #11
0
def fetch_and_validate_project(uri, version, entry_point, parameters):
    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri, version=version)
    project = _project_spec.load_project(work_dir)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    return work_dir
예제 #12
0
def load_project():
    return _project_spec.load_project(directory=TEST_PROJECT_DIR)