Python get_deployment_config 예제들, dbx.utils.common.get_deployment_config Python 예제들

예제 #1

0

파일 보기

파일: test_common.py 프로젝트: jspreddy/dbx

 def test_yaml_file_read_will_match_the_json_file_contents(self):
     json_file = format_path("../deployment-configs/01-yaml-test.json")
     yaml_file = format_path("../deployment-configs/01-yaml-test.yaml")
     json_default_env = get_deployment_config(json_file).get_environment(
         "default")
     yaml_default_env = get_deployment_config(yaml_file).get_environment(
         "default")
     assert yaml_default_env == json_default_env

예제 #2

0

파일 보기

파일: test_common.py 프로젝트: jspreddy/dbx

 def test_yaml_file_can_be_read(self):
     json_file = format_path("../deployment-configs/01-yaml-test.json")
     yaml_file = format_path("../deployment-configs/01-yaml-test.yaml")
     json_default_envs = get_deployment_config(
         json_file).get_all_environment_names()
     yaml_default_envs = get_deployment_config(
         yaml_file).get_all_environment_names()
     assert json_default_envs == yaml_default_envs

예제 #3

0

파일 보기

파일: test_common.py 프로젝트: jspreddy/dbx

 def test_yaml_variables_will_result_in_equivalent_output_to_json_file(
         self):
     json_file = format_path(
         "../deployment-configs/02-yaml-with-vars-test.json")
     yaml_file = format_path(
         "../deployment-configs/02-yaml-with-vars-test.yaml")
     json_default_env = get_deployment_config(json_file).get_environment(
         "default")
     yaml_default_env = get_deployment_config(yaml_file).get_environment(
         "default")
     assert yaml_default_env == json_default_env

예제 #4

0

파일 보기

파일: execute.py 프로젝트: jspreddy/dbx

def execute(
    environment: str,
    cluster_id: str,
    cluster_name: str,
    job: str,
    deployment_file: str,
    requirements_file: str,
    no_package: bool,
    no_rebuild: bool,
):
    api_client = prepare_environment(environment)

    cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id)

    dbx_echo(
        f"Executing job: {job} in environment {environment} on cluster {cluster_name} (id: {cluster_id})"
    )

    handle_package(no_rebuild)

    deployment = get_deployment_config(deployment_file).get_environment(
        environment)

    _verify_deployment(deployment, environment, deployment_file)

    found_jobs = [j for j in deployment["jobs"] if j["name"] == job]

    if not found_jobs:
        raise RuntimeError(
            f"Job {job} was not found in environment jobs, please check the deployment file"
        )

    job_payload = found_jobs[0]

    entrypoint_file = job_payload.get("spark_python_task").get("python_file")

    if not entrypoint_file:
        raise FileNotFoundError(
            f"No entrypoint file provided in job {job}. "
            f"Please add one under spark_python_task.python_file section")

    cluster_service = ClusterService(api_client)

    dbx_echo("Preparing interactive cluster to accept jobs")
    awake_cluster(cluster_service, cluster_id)

    v1_client = ApiV1Client(api_client)
    context_id = get_context_id(v1_client, cluster_id, "python")
    file_uploader = FileUploader(api_client)

    with mlflow.start_run() as execution_run:

        artifact_base_uri = execution_run.info.artifact_uri
        localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/")

        requirements_fp = pathlib.Path(requirements_file)
        if requirements_fp.exists():
            file_uploader.upload_file(requirements_fp)
            localized_requirements_path = f"{localized_base_path}/{str(requirements_fp)}"

            installation_command = f"%pip install -U -r {localized_requirements_path}"

            dbx_echo("Installing provided requirements")
            execute_command(v1_client,
                            cluster_id,
                            context_id,
                            installation_command,
                            verbose=False)
            dbx_echo("Provided requirements installed")
        else:
            dbx_echo(
                f"Requirements file {requirements_fp} is not provided" +
                ", following the execution without any additional packages")

        if not no_package:
            package_file = get_package_file()

            if not package_file:
                raise FileNotFoundError(
                    "Project package was not found. Please check that /dist directory exists."
                )

            file_uploader.upload_file(package_file)
            localized_package_path = f"{localized_base_path}/{str(package_file.as_posix())}"

            dbx_echo("Installing package")
            installation_command = f"%pip install --force-reinstall {localized_package_path}"
            execute_command(v1_client,
                            cluster_id,
                            context_id,
                            installation_command,
                            verbose=False)
            dbx_echo("Package installation finished")
        else:
            dbx_echo(
                "Package was disabled via --no-package, only the code from entrypoint will be used"
            )

        tags = {"dbx_action_type": "execute", "dbx_environment": environment}

        mlflow.set_tags(tags)

        dbx_echo("Processing parameters")
        task_props: List[Any] = job_payload.get("spark_python_task").get(
            "parameters", [])

        if task_props:

            def adjustment_callback(p: Any):
                return _adjust_path(p, artifact_base_uri, file_uploader)

            _walk_content(adjustment_callback, task_props)

        task_props = ["python"] + task_props

        parameters_command = f"""
        import sys
        sys.argv = {task_props}
        """

        execute_command(v1_client,
                        cluster_id,
                        context_id,
                        parameters_command,
                        verbose=False)

        dbx_echo("Processing parameters - done")

        dbx_echo("Starting entrypoint file execution")
        execute_command(v1_client, cluster_id, context_id,
                        pathlib.Path(entrypoint_file).read_text())
        dbx_echo("Command execution finished")

예제 #5

0

파일 보기

파일: deploy.py 프로젝트: jspreddy/dbx

def deploy(
    deployment_file: str,
    jobs: str,
    requirements_file: str,
    tags: List[str],
    environment: str,
    no_rebuild: bool,
    no_package: bool,
    files_only: bool,
    write_specs_to_file: Optional[str],
    branch_name: Optional[str],
):
    dbx_echo(f"Starting new deployment for environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)
    handle_package(no_rebuild)
    package_file = get_package_file()

    if not branch_name:
        branch_name = get_current_branch_name()

    _verify_deployment_file(deployment_file)

    deployment_file_config = get_deployment_config(deployment_file)
    deployment = deployment_file_config.get_environment(environment)

    if not deployment:
        raise NameError(f"""
        Requested environment {environment} is non-existent in the deployment file {deployment_file}.
        Available environments are: {deployment_file_config.get_all_environment_names()}
        """)

    if jobs:
        requested_jobs = jobs.split(",")
    else:
        requested_jobs = None

    requirements_payload = _preprocess_requirements(requirements_file)

    _preprocess_deployment(deployment, requested_jobs)

    _file_uploader = FileUploader(api_client)

    with mlflow.start_run() as deployment_run:

        artifact_base_uri = deployment_run.info.artifact_uri

        if no_package:
            dbx_echo(
                "No package definition will be added into job description")
            package_requirement = []
        else:
            if package_file:
                package_requirement = [{"whl": str(package_file)}]
            else:
                dbx_echo(
                    "Package file was not found! Please check your /dist/ folder"
                )
                package_requirement = []

        _adjust_job_definitions(deployment["jobs"], artifact_base_uri,
                                requirements_payload, package_requirement,
                                _file_uploader, api_client)

        if not files_only:
            dbx_echo("Updating job definitions")
            deployment_data = _create_jobs(deployment["jobs"], api_client)
            _log_dbx_file(deployment_data, "deployments.json")

            for job_spec in deployment.get("jobs"):
                permissions = job_spec.get("permissions")
                if permissions:
                    job_name = job_spec.get("name")
                    dbx_echo(
                        f"Permission settings are provided for job {job_name}, setting it up"
                    )
                    job_id = deployment_data.get(job_spec.get("name"))
                    api_client.perform_query("PUT",
                                             f"/permissions/jobs/{job_id}",
                                             data=permissions)
                    dbx_echo(
                        f"Permission settings were successfully set for job {job_name}"
                    )

            dbx_echo("Updating job definitions - done")

        deployment_tags = {
            "dbx_action_type": "deploy",
            "dbx_environment": environment,
            "dbx_status": "SUCCESS",
        }

        deployment_spec = {environment: deployment}

        deployment_tags.update(additional_tags)

        if branch_name:
            deployment_tags["dbx_branch_name"] = branch_name

        if files_only:
            deployment_tags["dbx_deploy_type"] = "files_only"

        _log_dbx_file(deployment_spec, "deployment-result.json")

        mlflow.set_tags(deployment_tags)
        dbx_echo(
            f"Deployment for environment {environment} finished successfully")

        if write_specs_to_file:
            dbx_echo("Writing final job specifications into file")
            specs_file = pathlib.Path(write_specs_to_file)

            if specs_file.exists():
                specs_file.unlink()

            specs_file.write_text(json.dumps(deployment_spec, indent=4))