Exemple #1
0
def _create_run(uri, experiment_id, work_dir, entry_point):
    """
    Create a ``Run`` against the current MLflow tracking server, logging metadata (e.g. the URI,
    entry point, and parameters of the project) about the run. Return an ``ActiveRun`` that can be
    used to report additional data about the run (metrics/params) to the tracking server.
    """
    if _is_local_uri(uri):
        source_name = tracking.utils._get_git_url_if_present(_expand_uri(uri))
    else:
        source_name = _expand_uri(uri)
    active_run = tracking.get_service().create_run(
        experiment_id=experiment_id,
        source_name=source_name,
        source_version=_get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    return active_run
Exemple #2
0
def _build_docker_image(work_dir, project, active_run):
    """
    Build a docker image containing the project in `work_dir`, using the base image and tagging the
    built image with the project name specified by `project`.
    """
    if not project.name:
        raise ExecutionException(
            "Project name in MLProject must be specified when using docker "
            "for image tagging.")
    tag_name = "mlflow-{name}-{version}".format(
        name=(project.name if project.name else "docker-project"),
        version=_get_git_commit(work_dir)[:7],
    )
    dockerfile = ("FROM {imagename}\n"
                  "LABEL Name={tag_name}\n"
                  "COPY {build_context_path}/* /mlflow/projects/code/\n"
                  "WORKDIR /mlflow/projects/code/\n").format(
                      imagename=project.docker_env.get('image'),
                      tag_name=tag_name,
                      build_context_path=_PROJECT_TAR_ARCHIVE_NAME)
    build_ctx_path = _create_docker_build_ctx(work_dir, dockerfile)
    with open(build_ctx_path, 'rb') as docker_build_ctx:
        _logger.info("=== Building docker image %s ===", tag_name)
        client = docker.from_env()
        image = client.images.build(tag=tag_name,
                                    forcerm=True,
                                    dockerfile=posixpath.join(
                                        _PROJECT_TAR_ARCHIVE_NAME,
                                        _GENERATED_DOCKERFILE_NAME),
                                    fileobj=docker_build_ctx,
                                    custom_context=True,
                                    encoding="gzip")
    try:
        os.remove(build_ctx_path)
    except Exception:  # pylint: disable=broad-except
        _logger.info("Temporary docker context file %s was not deleted.",
                     build_ctx_path)
    tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                    MLFLOW_DOCKER_IMAGE_NAME, tag_name)
    tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                    MLFLOW_DOCKER_IMAGE_ID, image[0].id)
    return tag_name
Exemple #3
0
def run_databricks(uri, entry_point, version, parameters, experiment_id,
                   cluster_spec, git_username, git_password):
    """
    Runs the project at the specified URI on Databricks, returning a `SubmittedRun` that can be
    used to query the run's status or wait for the resulting Databricks Job run to terminate.
    """
    tracking_uri = tracking.get_tracking_uri()
    _before_run_validations(tracking_uri, cluster_spec)
    work_dir = _fetch_and_clean_project(uri=uri,
                                        version=version,
                                        git_username=git_username,
                                        git_password=git_password)
    project = _project_spec.load_project(work_dir)
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    dbfs_fuse_uri = _upload_project_to_dbfs(work_dir, experiment_id)
    remote_run = tracking.get_service().create_run(
        experiment_id=experiment_id,
        source_name=_expand_uri(uri),
        source_version=_get_git_commit(work_dir),
        entry_point_name=entry_point,
        source_type=SourceType.PROJECT)
    env_vars = {
        tracking._TRACKING_URI_ENV_VAR: tracking_uri,
        tracking._EXPERIMENT_ID_ENV_VAR: experiment_id,
    }
    run_id = remote_run.info.run_uuid
    eprint("=== Running entry point %s of project %s on Databricks. ===" %
           (entry_point, uri))
    # Launch run on Databricks
    with open(cluster_spec, 'r') as handle:
        try:
            cluster_spec = json.load(handle)
        except ValueError:
            eprint(
                "Error when attempting to load and parse JSON cluster spec from file "
                "%s. " % cluster_spec)
            raise
    command = _get_databricks_run_cmd(dbfs_fuse_uri, run_id, entry_point,
                                      parameters)
    db_run_id = _run_shell_command_job(uri, command, env_vars, cluster_spec)
    return DatabricksSubmittedRun(db_run_id, run_id)