def _adjust_path(candidate, adjustment, file_uploader: FileUploader): if isinstance(candidate, str): # path already adjusted or points to another dbfs object - pass it if candidate.startswith("dbfs"): return candidate else: file_path = pathlib.Path(candidate) # this is a fix for pathlib behaviour related to WinError # in case if we pass incorrect or unsupported string, for example local[*] on Win we receive a OSError try: local_file_exists = file_path.exists() except OSError: local_file_exists = False if local_file_exists: adjusted_path = "%s/%s" % (adjustment, file_path.as_posix()) if file_uploader.file_exists(adjusted_path): dbx_echo( "File is already stored in the deployment, no action needed" ) else: file_uploader.upload_file(file_path) return adjusted_path else: return candidate else: return candidate
def execute( environment: str, cluster_id: str, cluster_name: str, job: str, deployment_file: str, requirements_file: str, no_package: bool, no_rebuild: bool, ): api_client = prepare_environment(environment) cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id) dbx_echo( f"Executing job: {job} in environment {environment} on cluster {cluster_name} (id: {cluster_id})" ) handle_package(no_rebuild) deployment = get_deployment_config(deployment_file).get_environment( environment) _verify_deployment(deployment, environment, deployment_file) found_jobs = [j for j in deployment["jobs"] if j["name"] == job] if not found_jobs: raise RuntimeError( f"Job {job} was not found in environment jobs, please check the deployment file" ) job_payload = found_jobs[0] entrypoint_file = job_payload.get("spark_python_task").get("python_file") if not entrypoint_file: raise FileNotFoundError( f"No entrypoint file provided in job {job}. " f"Please add one under spark_python_task.python_file section") cluster_service = ClusterService(api_client) dbx_echo("Preparing interactive cluster to accept jobs") awake_cluster(cluster_service, cluster_id) v1_client = ApiV1Client(api_client) context_id = get_context_id(v1_client, cluster_id, "python") file_uploader = FileUploader(api_client) with mlflow.start_run() as execution_run: artifact_base_uri = execution_run.info.artifact_uri localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/") requirements_fp = pathlib.Path(requirements_file) if requirements_fp.exists(): file_uploader.upload_file(requirements_fp) localized_requirements_path = f"{localized_base_path}/{str(requirements_fp)}" installation_command = f"%pip install -U -r {localized_requirements_path}" dbx_echo("Installing provided requirements") execute_command(v1_client, cluster_id, context_id, installation_command, verbose=False) dbx_echo("Provided requirements installed") else: dbx_echo( f"Requirements file {requirements_fp} is not provided" + ", following the execution without any additional packages") if not no_package: package_file = get_package_file() if not package_file: raise FileNotFoundError( "Project package was not found. Please check that /dist directory exists." ) file_uploader.upload_file(package_file) localized_package_path = f"{localized_base_path}/{str(package_file.as_posix())}" dbx_echo("Installing package") installation_command = f"%pip install --force-reinstall {localized_package_path}" execute_command(v1_client, cluster_id, context_id, installation_command, verbose=False) dbx_echo("Package installation finished") else: dbx_echo( "Package was disabled via --no-package, only the code from entrypoint will be used" ) tags = {"dbx_action_type": "execute", "dbx_environment": environment} mlflow.set_tags(tags) dbx_echo("Processing parameters") task_props: List[Any] = job_payload.get("spark_python_task").get( "parameters", []) if task_props: def adjustment_callback(p: Any): return _adjust_path(p, artifact_base_uri, file_uploader) _walk_content(adjustment_callback, task_props) task_props = ["python"] + task_props parameters_command = f""" import sys sys.argv = {task_props} """ execute_command(v1_client, cluster_id, context_id, parameters_command, verbose=False) dbx_echo("Processing parameters - done") dbx_echo("Starting entrypoint file execution") execute_command(v1_client, cluster_id, context_id, pathlib.Path(entrypoint_file).read_text()) dbx_echo("Command execution finished")
def execute( environment: str, cluster_id: str, cluster_name: str, job: str, deployment_file: str, requirements_file: str, no_rebuild: bool, ): api_client = prepare_environment(environment) cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id) dbx_echo( f"Executing job: {job} with environment: {environment} on cluster: {cluster_id}" ) handle_package(no_rebuild) deployment = DeploymentFile(deployment_file).get_environment(environment) if not deployment: raise NameError( f"Environment {environment} is not provided in deployment file {deployment_file}" + " please add this environment first" ) env_jobs = deployment.get("jobs") if not env_jobs: raise RuntimeError( f"No jobs section found in environment {environment}, please check the deployment file" ) found_jobs = [j for j in deployment["jobs"] if j["name"] == job] if not found_jobs: raise RuntimeError( f"Job {job} was not found in environment jobs, please check the deployment file" ) job_payload = found_jobs[0] entrypoint_file = job_payload.get("spark_python_task").get("python_file") if not entrypoint_file: raise FileNotFoundError( f"No entrypoint file provided in job {job}. " f"Please add one under spark_python_task.python_file section" ) cluster_service = ClusterService(api_client) dbx_echo("Preparing interactive cluster to accept jobs") awake_cluster(cluster_service, cluster_id) v1_client = ApiV1Client(api_client) context_id = get_context_id(v1_client, cluster_id, "python") file_uploader = FileUploader(api_client) with mlflow.start_run() as execution_run: artifact_base_uri = execution_run.info.artifact_uri localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/") requirements_fp = pathlib.Path(requirements_file) if requirements_fp.exists(): file_uploader.upload_file(requirements_fp) localized_requirements_path = ( f"{localized_base_path}/{str(requirements_fp)}" ) installation_command = f"%pip install -U -r {localized_requirements_path}" dbx_echo("Installing provided requirements") execute_command( v1_client, cluster_id, context_id, installation_command, verbose=False ) dbx_echo("Provided requirements installed") else: dbx_echo( f"Requirements file {requirements_fp} is not provided" + ", following the execution without any additional packages" ) project_package_path = list(pathlib.Path(".").rglob("dist/*.whl")) # Make sure we always take the newest package (i.e. sort by time) project_package_path.sort(key=os.path.getctime, reverse=True) project_package_path = project_package_path[0] file_uploader.upload_file(project_package_path) localized_package_path = ( f"{localized_base_path}/{str(project_package_path.as_posix())}" ) dbx_echo("Installing package") installation_command = f"%pip install {localized_package_path} --force-reinstall" execute_command( v1_client, cluster_id, context_id, installation_command, verbose=False ) dbx_echo("Package installation finished") tags = {"dbx_action_type": "execute", "dbx_environment": environment} mlflow.set_tags(tags) dbx_echo("Starting entrypoint file execution") execute_command( v1_client, cluster_id, context_id, pathlib.Path(entrypoint_file).read_text() ) dbx_echo("Command execution finished")
def deploy( deployment_file: str, jobs: str, requirements_file: str, tags: List[str], environment: str, no_rebuild: bool, no_package: bool, files_only: bool, write_specs_to_file: Optional[str], branch_name: Optional[str], ): dbx_echo(f"Starting new deployment for environment {environment}") api_client = prepare_environment(environment) additional_tags = parse_multiple(tags) handle_package(no_rebuild) package_file = get_package_file() if not branch_name: branch_name = get_current_branch_name() _verify_deployment_file(deployment_file) deployment_file_config = get_deployment_config(deployment_file) deployment = deployment_file_config.get_environment(environment) if not deployment: raise NameError(f""" Requested environment {environment} is non-existent in the deployment file {deployment_file}. Available environments are: {deployment_file_config.get_all_environment_names()} """) if jobs: requested_jobs = jobs.split(",") else: requested_jobs = None requirements_payload = _preprocess_requirements(requirements_file) _preprocess_deployment(deployment, requested_jobs) _file_uploader = FileUploader(api_client) with mlflow.start_run() as deployment_run: artifact_base_uri = deployment_run.info.artifact_uri if no_package: dbx_echo( "No package definition will be added into job description") package_requirement = [] else: if package_file: package_requirement = [{"whl": str(package_file)}] else: dbx_echo( "Package file was not found! Please check your /dist/ folder" ) package_requirement = [] _adjust_job_definitions(deployment["jobs"], artifact_base_uri, requirements_payload, package_requirement, _file_uploader, api_client) if not files_only: dbx_echo("Updating job definitions") deployment_data = _create_jobs(deployment["jobs"], api_client) _log_dbx_file(deployment_data, "deployments.json") for job_spec in deployment.get("jobs"): permissions = job_spec.get("permissions") if permissions: job_name = job_spec.get("name") dbx_echo( f"Permission settings are provided for job {job_name}, setting it up" ) job_id = deployment_data.get(job_spec.get("name")) api_client.perform_query("PUT", f"/permissions/jobs/{job_id}", data=permissions) dbx_echo( f"Permission settings were successfully set for job {job_name}" ) dbx_echo("Updating job definitions - done") deployment_tags = { "dbx_action_type": "deploy", "dbx_environment": environment, "dbx_status": "SUCCESS", } deployment_spec = {environment: deployment} deployment_tags.update(additional_tags) if branch_name: deployment_tags["dbx_branch_name"] = branch_name if files_only: deployment_tags["dbx_deploy_type"] = "files_only" _log_dbx_file(deployment_spec, "deployment-result.json") mlflow.set_tags(deployment_tags) dbx_echo( f"Deployment for environment {environment} finished successfully") if write_specs_to_file: dbx_echo("Writing final job specifications into file") specs_file = pathlib.Path(write_specs_to_file) if specs_file.exists(): specs_file.unlink() specs_file.write_text(json.dumps(deployment_spec, indent=4))
def deploy( deployment_file: str, jobs: str, requirements_file: str, tags: List[str], environment: str, no_rebuild: bool, no_package: bool, ): dbx_echo(f"Starting new deployment for environment {environment}") api_client = prepare_environment(environment) additional_tags = parse_multiple(tags) branch_name = get_current_branch_name() handle_package(no_rebuild) package_file = get_package_file() _verify_deployment_file(deployment_file) deployment_file_controller = DeploymentFile(deployment_file) deployment = deployment_file_controller.get_environment(environment) if not deployment: raise NameError(f""" Requested environment {environment} is non-existent in the deployment file {deployment_file}. Available environments are: {deployment_file_controller.get_all_environment_names()} """) if jobs: requested_jobs = jobs.split(",") else: requested_jobs = None requirements_payload = _preprocess_requirements(requirements_file) _preprocess_deployment(deployment, requested_jobs) _file_uploader = FileUploader(api_client) with mlflow.start_run() as deployment_run: artifact_base_uri = deployment_run.info.artifact_uri if no_package: dbx_echo( "No package definition will be added into job description") package_requirement = [] else: if package_file: package_requirement = [{"whl": str(package_file)}] else: dbx_echo( "Package file was not found! Please check your /dist/ folder" ) package_requirement = [] _adjust_job_definitions( deployment["jobs"], artifact_base_uri, requirements_payload, package_requirement, _file_uploader, ) deployment_data = _create_jobs(deployment["jobs"], api_client) _log_deployments(deployment_data) deployment_tags = { "dbx_action_type": "deploy", "dbx_environment": environment, "dbx_status": "SUCCESS", } deployment_tags.update(additional_tags) if branch_name: deployment_tags["dbx_branch_name"] = branch_name mlflow.set_tags(deployment_tags) dbx_echo( f"Deployment for environment {environment} finished successfully")