def test_yaml_file_read_will_match_the_json_file_contents(self): json_file = format_path("../deployment-configs/01-yaml-test.json") yaml_file = format_path("../deployment-configs/01-yaml-test.yaml") json_default_env = get_deployment_config(json_file).get_environment( "default") yaml_default_env = get_deployment_config(yaml_file).get_environment( "default") assert yaml_default_env == json_default_env
def test_yaml_file_can_be_read(self): json_file = format_path("../deployment-configs/01-yaml-test.json") yaml_file = format_path("../deployment-configs/01-yaml-test.yaml") json_default_envs = get_deployment_config( json_file).get_all_environment_names() yaml_default_envs = get_deployment_config( yaml_file).get_all_environment_names() assert json_default_envs == yaml_default_envs
def test_yaml_variables_will_result_in_equivalent_output_to_json_file( self): json_file = format_path( "../deployment-configs/02-yaml-with-vars-test.json") yaml_file = format_path( "../deployment-configs/02-yaml-with-vars-test.yaml") json_default_env = get_deployment_config(json_file).get_environment( "default") yaml_default_env = get_deployment_config(yaml_file).get_environment( "default") assert yaml_default_env == json_default_env
def execute( environment: str, cluster_id: str, cluster_name: str, job: str, deployment_file: str, requirements_file: str, no_package: bool, no_rebuild: bool, ): api_client = prepare_environment(environment) cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id) dbx_echo( f"Executing job: {job} in environment {environment} on cluster {cluster_name} (id: {cluster_id})" ) handle_package(no_rebuild) deployment = get_deployment_config(deployment_file).get_environment( environment) _verify_deployment(deployment, environment, deployment_file) found_jobs = [j for j in deployment["jobs"] if j["name"] == job] if not found_jobs: raise RuntimeError( f"Job {job} was not found in environment jobs, please check the deployment file" ) job_payload = found_jobs[0] entrypoint_file = job_payload.get("spark_python_task").get("python_file") if not entrypoint_file: raise FileNotFoundError( f"No entrypoint file provided in job {job}. " f"Please add one under spark_python_task.python_file section") cluster_service = ClusterService(api_client) dbx_echo("Preparing interactive cluster to accept jobs") awake_cluster(cluster_service, cluster_id) v1_client = ApiV1Client(api_client) context_id = get_context_id(v1_client, cluster_id, "python") file_uploader = FileUploader(api_client) with mlflow.start_run() as execution_run: artifact_base_uri = execution_run.info.artifact_uri localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/") requirements_fp = pathlib.Path(requirements_file) if requirements_fp.exists(): file_uploader.upload_file(requirements_fp) localized_requirements_path = f"{localized_base_path}/{str(requirements_fp)}" installation_command = f"%pip install -U -r {localized_requirements_path}" dbx_echo("Installing provided requirements") execute_command(v1_client, cluster_id, context_id, installation_command, verbose=False) dbx_echo("Provided requirements installed") else: dbx_echo( f"Requirements file {requirements_fp} is not provided" + ", following the execution without any additional packages") if not no_package: package_file = get_package_file() if not package_file: raise FileNotFoundError( "Project package was not found. Please check that /dist directory exists." ) file_uploader.upload_file(package_file) localized_package_path = f"{localized_base_path}/{str(package_file.as_posix())}" dbx_echo("Installing package") installation_command = f"%pip install --force-reinstall {localized_package_path}" execute_command(v1_client, cluster_id, context_id, installation_command, verbose=False) dbx_echo("Package installation finished") else: dbx_echo( "Package was disabled via --no-package, only the code from entrypoint will be used" ) tags = {"dbx_action_type": "execute", "dbx_environment": environment} mlflow.set_tags(tags) dbx_echo("Processing parameters") task_props: List[Any] = job_payload.get("spark_python_task").get( "parameters", []) if task_props: def adjustment_callback(p: Any): return _adjust_path(p, artifact_base_uri, file_uploader) _walk_content(adjustment_callback, task_props) task_props = ["python"] + task_props parameters_command = f""" import sys sys.argv = {task_props} """ execute_command(v1_client, cluster_id, context_id, parameters_command, verbose=False) dbx_echo("Processing parameters - done") dbx_echo("Starting entrypoint file execution") execute_command(v1_client, cluster_id, context_id, pathlib.Path(entrypoint_file).read_text()) dbx_echo("Command execution finished")
def deploy( deployment_file: str, jobs: str, requirements_file: str, tags: List[str], environment: str, no_rebuild: bool, no_package: bool, files_only: bool, write_specs_to_file: Optional[str], branch_name: Optional[str], ): dbx_echo(f"Starting new deployment for environment {environment}") api_client = prepare_environment(environment) additional_tags = parse_multiple(tags) handle_package(no_rebuild) package_file = get_package_file() if not branch_name: branch_name = get_current_branch_name() _verify_deployment_file(deployment_file) deployment_file_config = get_deployment_config(deployment_file) deployment = deployment_file_config.get_environment(environment) if not deployment: raise NameError(f""" Requested environment {environment} is non-existent in the deployment file {deployment_file}. Available environments are: {deployment_file_config.get_all_environment_names()} """) if jobs: requested_jobs = jobs.split(",") else: requested_jobs = None requirements_payload = _preprocess_requirements(requirements_file) _preprocess_deployment(deployment, requested_jobs) _file_uploader = FileUploader(api_client) with mlflow.start_run() as deployment_run: artifact_base_uri = deployment_run.info.artifact_uri if no_package: dbx_echo( "No package definition will be added into job description") package_requirement = [] else: if package_file: package_requirement = [{"whl": str(package_file)}] else: dbx_echo( "Package file was not found! Please check your /dist/ folder" ) package_requirement = [] _adjust_job_definitions(deployment["jobs"], artifact_base_uri, requirements_payload, package_requirement, _file_uploader, api_client) if not files_only: dbx_echo("Updating job definitions") deployment_data = _create_jobs(deployment["jobs"], api_client) _log_dbx_file(deployment_data, "deployments.json") for job_spec in deployment.get("jobs"): permissions = job_spec.get("permissions") if permissions: job_name = job_spec.get("name") dbx_echo( f"Permission settings are provided for job {job_name}, setting it up" ) job_id = deployment_data.get(job_spec.get("name")) api_client.perform_query("PUT", f"/permissions/jobs/{job_id}", data=permissions) dbx_echo( f"Permission settings were successfully set for job {job_name}" ) dbx_echo("Updating job definitions - done") deployment_tags = { "dbx_action_type": "deploy", "dbx_environment": environment, "dbx_status": "SUCCESS", } deployment_spec = {environment: deployment} deployment_tags.update(additional_tags) if branch_name: deployment_tags["dbx_branch_name"] = branch_name if files_only: deployment_tags["dbx_deploy_type"] = "files_only" _log_dbx_file(deployment_spec, "deployment-result.json") mlflow.set_tags(deployment_tags) dbx_echo( f"Deployment for environment {environment} finished successfully") if write_specs_to_file: dbx_echo("Writing final job specifications into file") specs_file = pathlib.Path(write_specs_to_file) if specs_file.exists(): specs_file.unlink() specs_file.write_text(json.dumps(deployment_spec, indent=4))