Example #1
0
def get_context_id(v1_client: ApiV1Client, cluster_id: str, language: str):
    dbx_echo("Preparing execution context")
    lock_context_id = ContextLockFile.get_context()

    if _is_context_available(v1_client, cluster_id, lock_context_id):
        dbx_echo("Existing context is active, using it")
        return lock_context_id
    else:
        dbx_echo("Existing context is not active, creating a new one")
        context_id = create_context(v1_client, cluster_id, language)
        ContextLockFile.set_context(context_id)
        dbx_echo("New context prepared, ready to use it")
        return context_id
Example #2
0
 def launch(self):
     dbx_echo(f"Starting deployment to factory {self.factory_name}")
     prepared_activities = []
     for job_spec in self._specs:
         job_name = job_spec.get("name")
         dbx_echo(f"Preparing job {job_name}")
         service_name = self._create_linked_service(job_spec)
         job_activity = self._generate_activity(job_spec, service_name)
         prepared_activities.append(job_activity)
         dbx_echo(f"Preparing job {job_name} - done")
     self._update_pipeline(prepared_activities)
     dbx_echo(
         f"Reflection to factory {self.factory_name} finished successfully")
Example #3
0
def execute_command(v1_client: ApiV1Client,
                    cluster_id: str,
                    context_id: str,
                    command: str,
                    verbose=True):
    payload = {
        "language": "python",
        "clusterId": cluster_id,
        "contextId": context_id,
        "command": command,
    }
    command_execution_data = v1_client.execute_command(payload)
    command_id = command_execution_data["id"]
    execution_result = wait_for_command_execution(v1_client, cluster_id,
                                                  context_id, command_id)
    if execution_result["status"] == "Cancelled":
        dbx_echo("Command cancelled")
    else:
        final_result = execution_result["results"]["resultType"]
        if final_result == "error":
            dbx_echo("Execution failed, please follow the given error")
            raise RuntimeError(
                f"Command execution failed. "
                f'Cluster error cause: {execution_result["results"]["cause"]}')

        if verbose:
            dbx_echo("Command successfully executed")
            print(execution_result["results"]["data"])

        return execution_result["results"]["data"]
Example #4
0
def _find_deployment_run(filter_string: str, tags: Dict[str, str],
                         as_run_submit: bool,
                         environment: str) -> Dict[str, Any]:
    runs = mlflow.search_runs(filter_string=filter_string,
                              order_by=["start_time DESC"])

    filter_conditions = []

    if tags:
        dbx_echo("Filtering deployments with set of additional tags")
        for tag_name, tag_value in tags.items():
            tag_column_name = f"tags.{tag_name}"
            if tag_column_name not in runs.columns:
                raise Exception(
                    f"Tag {tag_name} not found in underlying MLflow experiment, please verify tag existence in the UI"
                )
            tag_condition = runs[tag_column_name] == tag_value
            filter_conditions.append(tag_condition)
        full_filter = pd.DataFrame(filter_conditions).T.all(axis=1)  # noqa
        _runs = runs[full_filter]
    else:
        dbx_echo("No additional tags provided")
        _runs = runs

    if as_run_submit:
        if "tags.dbx_deploy_type" not in _runs.columns:
            raise Exception(""""
                Run Submit API is available only when deployment was done with --files-only flag.
                Currently there is no deployments with such flag under given filters. 
                Please re-deploy with --files-only flag and then re-run this launch command.
            """)

        _runs = _runs[_runs["tags.dbx_deploy_type"] == "files_only"]

    if _runs.empty:
        exception_string = """
        No runs provided per given set of filters:
            {filter_string}
        """
        if tags:
            exception_string = (exception_string + f"""
            With additional tags: {tags}
            """)
        if as_run_submit:
            exception_string = (exception_string + """
            With file-based deployments (dbx_deployment_type='files_only').
            """)
        experiment_location = InfoFile.get(environment).get("workspace_dir")
        exception_string = (exception_string + f"""
        To verify current status of deployments please check experiment UI in workspace dir: {experiment_location}
        """)

        raise Exception(exception_string)

    run_info = _runs.iloc[0].to_dict()

    dbx_echo("Successfully found deployment per given job name")
    return run_info
Example #5
0
    def _create_linked_service(self, job_spec: Dict[str, Any]) -> str:
        cluster_spec = job_spec.get("new_cluster")
        service_name = job_spec.get("name") + "-ls"
        dbx_echo(f"Preparing linked service {service_name}")
        if cluster_spec:
            service_spec = AzureDatabricksLinkedService(
                domain=self._config.host,
                access_token=SecureString(value=self._config.token),
                instance_pool_id=cluster_spec.get("instance_pool_id"),
                new_cluster_custom_tags=cluster_spec.get("custom_tags"),
                new_cluster_driver_node_type=cluster_spec.get(
                    "driver_node_type_id"),
                new_cluster_enable_elastic_disk=cluster_spec.get(
                    "enable_elastic_disk"),
                new_cluster_init_scripts=cluster_spec.get(
                    "enable_elastic_disk"),
                new_cluster_log_destination=cluster_spec.get(
                    "cluster_log_conf", {}).get("dbfs", {}).get("destination"),
                new_cluster_node_type=cluster_spec.get("node_type_id"),
                new_cluster_num_of_worker=cluster_spec.get("num_workers"),
                new_cluster_spark_conf=cluster_spec.get("spark_conf"),
                new_cluster_spark_env_vars=cluster_spec.get("spark_env_vars"),
                new_cluster_version=cluster_spec.get("spark_version"),
            )
        else:
            service_spec = AzureDatabricksLinkedService(
                domain=self._config.host,
                access_token=SecureString(value=self._config.token),
                existing_cluster_id=cluster_spec.get("existing_cluster_id"),
            )

        service_resource = LinkedServiceResource(properties=service_spec)
        self.adf_client.linked_services.create_or_update(
            self.resource_group, self.factory_name, service_name,
            service_resource)

        dbx_echo(f"Preparing linked service {service_name} - done")
        return service_name
Example #6
0
def _adjust_job_definitions(
    jobs: List[Dict[str, Any]],
    artifact_base_uri: str,
    requirements_payload: List[Dict[str, str]],
    package_payload: List[Dict[str, str]],
    file_uploader: FileUploader,
    api_client: ApiClient,
):
    def adjustment_callback(p: Any):
        return _adjust_path(p, artifact_base_uri, file_uploader)

    for job in jobs:
        job["libraries"] = job.get("libraries", []) + package_payload
        _walk_content(adjustment_callback, job)
        job["libraries"] = job.get("libraries", []) + requirements_payload
        policy_name = job.get("new_cluster", {}).get("policy_name")
        if policy_name:
            dbx_echo(
                f"Processing policy name {policy_name} for job {job['name']}")
            policy_spec = _preprocess_policy_name(api_client, policy_name)
            policy = json.loads(policy_spec["definition"])
            policy_props = PolicyParser(policy).parse()
            _deep_update(job["new_cluster"], policy_props, policy_name)
            job["new_cluster"]["policy_id"] = policy_spec["policy_id"]
Example #7
0
    def _update_pipeline(self,
                         new_activities: List[DatabricksSparkPythonActivity]):
        dbx_echo(f"Updating pipeline {self.name}")
        current_pipeline = self.adf_client.pipelines.get(
            self.resource_group, self.factory_name, self.name)

        new_activities_dict = {a.name: a for a in new_activities}
        existing_activities_dict = {
            a.name: a
            for a in current_pipeline.activities
        }
        intersected = set(new_activities_dict.keys()).intersection(
            set(existing_activities_dict.keys()))

        final_activity_list = []

        for name in intersected:
            _ex: Activity = existing_activities_dict.get(name)
            _new: Activity = new_activities_dict.get(name)
            _new.depends_on = _ex.depends_on
            final_activity_list.append(_new)

        for name, activity in existing_activities_dict.items():
            if name not in intersected:
                final_activity_list.append(activity)

        for name, activity in new_activities_dict.items():
            if name not in intersected:
                final_activity_list.append(activity)

        resource = PipelineResource(activities=final_activity_list)

        self.adf_client.pipelines.create_or_update(self.resource_group,
                                                   self.factory_name,
                                                   self.name, resource)
        dbx_echo(f"Updating pipeline {self.name} - done")
Example #8
0
def _create_job(api_client: ApiClient, job: Dict[str, Any]) -> str:
    dbx_echo(f'Creating a new job with name {job["name"]}')
    try:
        jobs_api = JobsApi(api_client)
        job_id = jobs_api.create_job(job)["job_id"]
    except HTTPError as e:
        dbx_echo("Failed to create job with definition:")
        dbx_echo(json.dumps(job, indent=4))
        raise e
    return job_id
Example #9
0
def _update_job(jobs_service: JobsService, job_id: str, job: Dict[str,
                                                                  Any]) -> str:
    dbx_echo(
        f'Updating existing job with id: {job_id} and name: {job["name"]}')
    try:
        jobs_service.reset_job(job_id, job)
    except HTTPError as e:
        dbx_echo("Failed to update job with definition:")
        dbx_echo(json.dumps(job, indent=4))
        raise e
    return job_id
Example #10
0
def awake_cluster(cluster_service: ClusterService, cluster_id):
    cluster_info = cluster_service.get_cluster(cluster_id)
    if cluster_info["state"] in ["RUNNING", "RESIZING"]:
        dbx_echo("Cluster is ready")
    if cluster_info["state"] in ["TERMINATED", "TERMINATING"]:
        dbx_echo("Dev cluster is terminated, starting it")
        cluster_service.start_cluster(cluster_id)
        time.sleep(5)
        awake_cluster(cluster_service, cluster_id)
    elif cluster_info["state"] == "ERROR":
        raise RuntimeError("Cluster is mis-configured and cannot be started, please check cluster settings at first")
    elif cluster_info["state"] in ["PENDING", "RESTARTING"]:
        dbx_echo(f'Cluster is getting prepared, current state: {cluster_info["state"]}')
        time.sleep(5)
        awake_cluster(cluster_service, cluster_id)
Example #11
0
def _wait_run(api_client: ApiClient, run_data: Dict[str,
                                                    Any]) -> Dict[str, Any]:
    dbx_echo(f"Tracing run with id {run_data['run_id']}")
    while True:
        time.sleep(
            5
        )  # runs API is eventually consistent, it's better to have a short pause for status update
        status = _get_run_status(api_client, run_data)
        run_state = status["state"]
        result_state = run_state.get("result_state", None)
        life_cycle_state = run_state.get("life_cycle_state", None)
        state_message = run_state.get("state_message")

        dbx_echo(f"[Run Id: {run_data['run_id']}] Current run status info - "
                 f"result state: {result_state}, "
                 f"lifecycle state: {life_cycle_state}, "
                 f"state message: {state_message}")

        if life_cycle_state in TERMINAL_RUN_LIFECYCLE_STATES:
            dbx_echo(f"Finished tracing run with id {run_data['run_id']}")
            return status
Example #12
0
def execute(
    environment: str,
    cluster_id: str,
    cluster_name: str,
    job: str,
    deployment_file: str,
    requirements_file: str,
    no_package: bool,
    no_rebuild: bool,
):
    api_client = prepare_environment(environment)

    cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id)

    dbx_echo(
        f"Executing job: {job} in environment {environment} on cluster {cluster_name} (id: {cluster_id})"
    )

    handle_package(no_rebuild)

    deployment = get_deployment_config(deployment_file).get_environment(
        environment)

    _verify_deployment(deployment, environment, deployment_file)

    found_jobs = [j for j in deployment["jobs"] if j["name"] == job]

    if not found_jobs:
        raise RuntimeError(
            f"Job {job} was not found in environment jobs, please check the deployment file"
        )

    job_payload = found_jobs[0]

    entrypoint_file = job_payload.get("spark_python_task").get("python_file")

    if not entrypoint_file:
        raise FileNotFoundError(
            f"No entrypoint file provided in job {job}. "
            f"Please add one under spark_python_task.python_file section")

    cluster_service = ClusterService(api_client)

    dbx_echo("Preparing interactive cluster to accept jobs")
    awake_cluster(cluster_service, cluster_id)

    v1_client = ApiV1Client(api_client)
    context_id = get_context_id(v1_client, cluster_id, "python")
    file_uploader = FileUploader(api_client)

    with mlflow.start_run() as execution_run:

        artifact_base_uri = execution_run.info.artifact_uri
        localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/")

        requirements_fp = pathlib.Path(requirements_file)
        if requirements_fp.exists():
            file_uploader.upload_file(requirements_fp)
            localized_requirements_path = f"{localized_base_path}/{str(requirements_fp)}"

            installation_command = f"%pip install -U -r {localized_requirements_path}"

            dbx_echo("Installing provided requirements")
            execute_command(v1_client,
                            cluster_id,
                            context_id,
                            installation_command,
                            verbose=False)
            dbx_echo("Provided requirements installed")
        else:
            dbx_echo(
                f"Requirements file {requirements_fp} is not provided" +
                ", following the execution without any additional packages")

        if not no_package:
            package_file = get_package_file()

            if not package_file:
                raise FileNotFoundError(
                    "Project package was not found. Please check that /dist directory exists."
                )

            file_uploader.upload_file(package_file)
            localized_package_path = f"{localized_base_path}/{str(package_file.as_posix())}"

            dbx_echo("Installing package")
            installation_command = f"%pip install --force-reinstall {localized_package_path}"
            execute_command(v1_client,
                            cluster_id,
                            context_id,
                            installation_command,
                            verbose=False)
            dbx_echo("Package installation finished")
        else:
            dbx_echo(
                "Package was disabled via --no-package, only the code from entrypoint will be used"
            )

        tags = {"dbx_action_type": "execute", "dbx_environment": environment}

        mlflow.set_tags(tags)

        dbx_echo("Processing parameters")
        task_props: List[Any] = job_payload.get("spark_python_task").get(
            "parameters", [])

        if task_props:

            def adjustment_callback(p: Any):
                return _adjust_path(p, artifact_base_uri, file_uploader)

            _walk_content(adjustment_callback, task_props)

        task_props = ["python"] + task_props

        parameters_command = f"""
        import sys
        sys.argv = {task_props}
        """

        execute_command(v1_client,
                        cluster_id,
                        context_id,
                        parameters_command,
                        verbose=False)

        dbx_echo("Processing parameters - done")

        dbx_echo("Starting entrypoint file execution")
        execute_command(v1_client, cluster_id, context_id,
                        pathlib.Path(entrypoint_file).read_text())
        dbx_echo("Command execution finished")
Example #13
0
def deploy(
    deployment_file: str,
    jobs: str,
    requirements_file: str,
    tags: List[str],
    environment: str,
    no_rebuild: bool,
    no_package: bool,
):
    dbx_echo(f"Starting new deployment for environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)
    branch_name = get_current_branch_name()
    handle_package(no_rebuild)
    package_file = get_package_file()

    _verify_deployment_file(deployment_file)

    deployment_file_controller = DeploymentFile(deployment_file)
    deployment = deployment_file_controller.get_environment(environment)

    if not deployment:
        raise NameError(f"""
        Requested environment {environment} is non-existent in the deployment file {deployment_file}.
        Available environments are: {deployment_file_controller.get_all_environment_names()}
        """)

    if jobs:
        requested_jobs = jobs.split(",")
    else:
        requested_jobs = None

    requirements_payload = _preprocess_requirements(requirements_file)

    _preprocess_deployment(deployment, requested_jobs)

    _file_uploader = FileUploader(api_client)

    with mlflow.start_run() as deployment_run:

        artifact_base_uri = deployment_run.info.artifact_uri

        if no_package:
            dbx_echo(
                "No package definition will be added into job description")
            package_requirement = []
        else:
            if package_file:
                package_requirement = [{"whl": str(package_file)}]
            else:
                dbx_echo(
                    "Package file was not found! Please check your /dist/ folder"
                )
                package_requirement = []

        _adjust_job_definitions(
            deployment["jobs"],
            artifact_base_uri,
            requirements_payload,
            package_requirement,
            _file_uploader,
        )
        deployment_data = _create_jobs(deployment["jobs"], api_client)
        _log_deployments(deployment_data)

        deployment_tags = {
            "dbx_action_type": "deploy",
            "dbx_environment": environment,
            "dbx_status": "SUCCESS",
        }

        deployment_tags.update(additional_tags)
        if branch_name:
            deployment_tags["dbx_branch_name"] = branch_name

        mlflow.set_tags(deployment_tags)
        dbx_echo(
            f"Deployment for environment {environment} finished successfully")
Example #14
0
def launch(
        environment: str,
        job: str,
        trace: bool,
        kill_on_sigterm: bool,
        existing_runs: str,
        tags: List[str],
        parameters: List[str],
):
    dbx_echo(f"Launching job {job} on environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)
    override_parameters = parse_multiple(parameters)

    filter_string = generate_filter_string(environment, additional_tags)

    runs = mlflow.search_runs(filter_string=filter_string, max_results=1)

    if runs.empty:
        raise EnvironmentError(
            f"""
        No runs provided per given set of filters:
            {filter_string}
        Please check experiment UI to verify current status of deployments.
        """
        )

    run_info = runs.iloc[0].to_dict()

    dbx_echo("Successfully found deployment per given job name")

    deployment_run_id = run_info["run_id"]

    with mlflow.start_run(run_id=deployment_run_id) as deployment_run:
        with mlflow.start_run(nested=True):

            artifact_base_uri = deployment_run.info.artifact_uri
            deployments = _load_deployments(api_client, artifact_base_uri)
            job_id = deployments.get(job)

            if not job_id:
                raise Exception(
                    f"Job with name {job} not found in the latest deployment" % job
                )

            jobs_service = JobsService(api_client)
            active_runs = jobs_service.list_runs(job_id, active_only=True).get(
                "runs", []
            )

            for run in active_runs:
                if existing_runs == "pass":
                    dbx_echo("Passing the existing runs status check")

                if existing_runs == "wait":
                    dbx_echo(
                        f'Waiting for job run with id {run["run_id"]} to be finished'
                    )
                    _wait_run(api_client, run)

                if existing_runs == "cancel":
                    dbx_echo(f'Cancelling run with id {run["run_id"]}')
                    _cancel_run(api_client, run)

            if override_parameters:
                _prepared_parameters = sum(
                    [[k, v] for k, v in override_parameters.items()], []
                )
                dbx_echo(
                    f"Default launch parameters are overridden with the following: {_prepared_parameters}"
                )
                run_data = jobs_service.run_now(
                    job_id, python_params=_prepared_parameters
                )
            else:
                run_data = jobs_service.run_now(job_id)

            if trace:
                dbx_echo("Tracing job run")
                if kill_on_sigterm:
                    dbx_echo("Click Ctrl+C to stop the job run")
                    try:
                        dbx_status = _trace_run(api_client, run_data)
                    except KeyboardInterrupt:
                        dbx_status = "CANCELLED"
                        dbx_echo("Cancelling the run gracefully")
                        _cancel_run(api_client, run_data)
                        dbx_echo("Run cancelled successfully")
                else:
                    dbx_status = _trace_run(api_client, run_data)

                if dbx_status == "ERROR":
                    raise Exception(
                        "Tracked job failed during execution. "
                        "Please check Databricks UI for job logs"
                    )
                dbx_echo("Launch command finished")

            else:
                dbx_status = "NOT_TRACKED"
                dbx_echo(
                    "Job successfully launched in non-tracking mode. Please check Databricks UI for job status"
                )

            deployment_tags = {
                "job_id": job_id,
                "run_id": run_data["run_id"],
                "dbx_action_type": "launch",
                "dbx_status": dbx_status,
                "dbx_environment": environment,
            }

            mlflow.set_tags(deployment_tags)
Example #15
0
def execute(
        environment: str,
        cluster_id: str,
        cluster_name: str,
        job: str,
        deployment_file: str,
        requirements_file: str,
        no_rebuild: bool,
):
    api_client = prepare_environment(environment)

    cluster_id = _preprocess_cluster_args(api_client, cluster_name, cluster_id)

    dbx_echo(
        f"Executing job: {job} with environment: {environment} on cluster: {cluster_id}"
    )

    handle_package(no_rebuild)

    deployment = DeploymentFile(deployment_file).get_environment(environment)

    if not deployment:
        raise NameError(
            f"Environment {environment} is not provided in deployment file {deployment_file}"
            + " please add this environment first"
        )

    env_jobs = deployment.get("jobs")
    if not env_jobs:
        raise RuntimeError(
            f"No jobs section found in environment {environment}, please check the deployment file"
        )

    found_jobs = [j for j in deployment["jobs"] if j["name"] == job]

    if not found_jobs:
        raise RuntimeError(
            f"Job {job} was not found in environment jobs, please check the deployment file"
        )

    job_payload = found_jobs[0]

    entrypoint_file = job_payload.get("spark_python_task").get("python_file")

    if not entrypoint_file:
        raise FileNotFoundError(
            f"No entrypoint file provided in job {job}. "
            f"Please add one under spark_python_task.python_file section"
        )

    cluster_service = ClusterService(api_client)

    dbx_echo("Preparing interactive cluster to accept jobs")
    awake_cluster(cluster_service, cluster_id)

    v1_client = ApiV1Client(api_client)
    context_id = get_context_id(v1_client, cluster_id, "python")
    file_uploader = FileUploader(api_client)

    with mlflow.start_run() as execution_run:

        artifact_base_uri = execution_run.info.artifact_uri
        localized_base_path = artifact_base_uri.replace("dbfs:/", "/dbfs/")

        requirements_fp = pathlib.Path(requirements_file)
        if requirements_fp.exists():
            file_uploader.upload_file(requirements_fp)
            localized_requirements_path = (
                f"{localized_base_path}/{str(requirements_fp)}"
            )

            installation_command = f"%pip install -U -r {localized_requirements_path}"

            dbx_echo("Installing provided requirements")
            execute_command(
                v1_client, cluster_id, context_id, installation_command, verbose=False
            )
            dbx_echo("Provided requirements installed")
        else:
            dbx_echo(
                f"Requirements file {requirements_fp} is not provided"
                + ", following the execution without any additional packages"
            )

        project_package_path = list(pathlib.Path(".").rglob("dist/*.whl"))

        # Make sure we always take the newest package (i.e. sort by time)
        project_package_path.sort(key=os.path.getctime, reverse=True)
        project_package_path = project_package_path[0]

        file_uploader.upload_file(project_package_path)
        localized_package_path = (
            f"{localized_base_path}/{str(project_package_path.as_posix())}"
        )
        dbx_echo("Installing package")
        installation_command = f"%pip install {localized_package_path} --force-reinstall"
        execute_command(
            v1_client, cluster_id, context_id, installation_command, verbose=False
        )
        dbx_echo("Package installation finished")

        tags = {"dbx_action_type": "execute", "dbx_environment": environment}

        mlflow.set_tags(tags)

        dbx_echo("Starting entrypoint file execution")
        execute_command(
            v1_client, cluster_id, context_id, pathlib.Path(entrypoint_file).read_text()
        )
        dbx_echo("Command execution finished")
Example #16
0
def launch(
    environment: str,
    job: str,
    trace: bool,
    kill_on_sigterm: bool,
    existing_runs: str,
    as_run_submit: bool,
    tags: List[str],
    parameters: List[str],
    parameters_raw: Optional[str],
):
    dbx_echo(f"Launching job {job} on environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)

    if parameters_raw:
        prepared_parameters = parameters_raw
    else:
        override_parameters = parse_multiple(parameters)
        prepared_parameters = sum([[k, v]
                                   for k, v in override_parameters.items()],
                                  [])

    filter_string = generate_filter_string(environment)

    run_info = _find_deployment_run(filter_string, additional_tags,
                                    as_run_submit, environment)

    deployment_run_id = run_info["run_id"]

    with mlflow.start_run(run_id=deployment_run_id) as deployment_run:

        with mlflow.start_run(nested=True):
            artifact_base_uri = deployment_run.info.artifact_uri

            if not as_run_submit:
                run_launcher = RunNowLauncher(job, api_client,
                                              artifact_base_uri, existing_runs,
                                              prepared_parameters)
            else:
                run_launcher = RunSubmitLauncher(job, api_client,
                                                 artifact_base_uri,
                                                 existing_runs,
                                                 prepared_parameters,
                                                 environment)

            run_data, job_id = run_launcher.launch()

            jobs_service = JobsService(api_client)
            run_info = jobs_service.get_run(run_data["run_id"])
            run_url = run_info.get("run_page_url")
            dbx_echo(f"Run URL: {run_url}")
            if trace:
                if kill_on_sigterm:
                    dbx_echo("Click Ctrl+C to stop the run")
                    try:
                        dbx_status = _trace_run(api_client, run_data)
                    except KeyboardInterrupt:
                        dbx_status = "CANCELLED"
                        dbx_echo("Cancelling the run gracefully")
                        _cancel_run(api_client, run_data)
                        dbx_echo("Run cancelled successfully")
                else:
                    dbx_status = _trace_run(api_client, run_data)

                if dbx_status == "ERROR":
                    raise Exception(
                        "Tracked run failed during execution. Please check Databricks UI for run logs"
                    )
                dbx_echo("Launch command finished")

            else:
                dbx_status = "NOT_TRACKED"
                dbx_echo(
                    "Run successfully launched in non-tracking mode. Please check Databricks UI for job status"
                )

            deployment_tags = {
                "job_id": job_id,
                "run_id": run_data.get("run_id"),
                "dbx_action_type": "launch",
                "dbx_status": dbx_status,
                "dbx_environment": environment,
            }

            mlflow.set_tags(deployment_tags)
Example #17
0
def deploy(
    deployment_file: str,
    jobs: str,
    requirements_file: str,
    tags: List[str],
    environment: str,
    no_rebuild: bool,
    no_package: bool,
    files_only: bool,
    write_specs_to_file: Optional[str],
    branch_name: Optional[str],
):
    dbx_echo(f"Starting new deployment for environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)
    handle_package(no_rebuild)
    package_file = get_package_file()

    if not branch_name:
        branch_name = get_current_branch_name()

    _verify_deployment_file(deployment_file)

    deployment_file_config = get_deployment_config(deployment_file)
    deployment = deployment_file_config.get_environment(environment)

    if not deployment:
        raise NameError(f"""
        Requested environment {environment} is non-existent in the deployment file {deployment_file}.
        Available environments are: {deployment_file_config.get_all_environment_names()}
        """)

    if jobs:
        requested_jobs = jobs.split(",")
    else:
        requested_jobs = None

    requirements_payload = _preprocess_requirements(requirements_file)

    _preprocess_deployment(deployment, requested_jobs)

    _file_uploader = FileUploader(api_client)

    with mlflow.start_run() as deployment_run:

        artifact_base_uri = deployment_run.info.artifact_uri

        if no_package:
            dbx_echo(
                "No package definition will be added into job description")
            package_requirement = []
        else:
            if package_file:
                package_requirement = [{"whl": str(package_file)}]
            else:
                dbx_echo(
                    "Package file was not found! Please check your /dist/ folder"
                )
                package_requirement = []

        _adjust_job_definitions(deployment["jobs"], artifact_base_uri,
                                requirements_payload, package_requirement,
                                _file_uploader, api_client)

        if not files_only:
            dbx_echo("Updating job definitions")
            deployment_data = _create_jobs(deployment["jobs"], api_client)
            _log_dbx_file(deployment_data, "deployments.json")

            for job_spec in deployment.get("jobs"):
                permissions = job_spec.get("permissions")
                if permissions:
                    job_name = job_spec.get("name")
                    dbx_echo(
                        f"Permission settings are provided for job {job_name}, setting it up"
                    )
                    job_id = deployment_data.get(job_spec.get("name"))
                    api_client.perform_query("PUT",
                                             f"/permissions/jobs/{job_id}",
                                             data=permissions)
                    dbx_echo(
                        f"Permission settings were successfully set for job {job_name}"
                    )

            dbx_echo("Updating job definitions - done")

        deployment_tags = {
            "dbx_action_type": "deploy",
            "dbx_environment": environment,
            "dbx_status": "SUCCESS",
        }

        deployment_spec = {environment: deployment}

        deployment_tags.update(additional_tags)

        if branch_name:
            deployment_tags["dbx_branch_name"] = branch_name

        if files_only:
            deployment_tags["dbx_deploy_type"] = "files_only"

        _log_dbx_file(deployment_spec, "deployment-result.json")

        mlflow.set_tags(deployment_tags)
        dbx_echo(
            f"Deployment for environment {environment} finished successfully")

        if write_specs_to_file:
            dbx_echo("Writing final job specifications into file")
            specs_file = pathlib.Path(write_specs_to_file)

            if specs_file.exists():
                specs_file.unlink()

            specs_file.write_text(json.dumps(deployment_spec, indent=4))