def _create_jobs(jobs: List[Dict[str, Any]], api_client: ApiClient) -> Dict[str, int]: deployment_data = {} for job in jobs: dbx_echo(f'Processing deployment for job: {job["name"]}') jobs_service = JobsService(api_client) all_jobs = jobs_service.list_jobs().get("jobs", []) matching_jobs = [ j for j in all_jobs if j["settings"]["name"] == job["name"] ] if not matching_jobs: job_id = _create_job(api_client, job) else: if len(matching_jobs) > 1: raise Exception( f"""There are more than one jobs with name {job["name"]}. Please delete duplicated jobs first""") job_id = matching_jobs[0]["job_id"] _update_job(jobs_service, job_id, job) deployment_data[job["name"]] = job_id return deployment_data
def _update_job(jobs_service: JobsService, job_id: str, job: Dict[str, Any]) -> str: dbx_echo( f'Updating existing job with id: {job_id} and name: {job["name"]}') try: jobs_service.reset_job(job_id, job) except HTTPError as e: dbx_echo("Failed to update job with definition:") dbx_echo(json.dumps(job, indent=4)) raise e return job_id
def launch(self) -> Tuple[Dict[Any, Any], Optional[str]]: dbx_echo("Launching job via run now API") jobs_service = JobsService(self.api_client) all_jobs = jobs_service.list_jobs().get("jobs", []) matching_jobs = [ j for j in all_jobs if j["settings"]["name"] == self.job ] if not matching_jobs: raise Exception(f"Job with name {self.job} not found") if len(matching_jobs) > 1: raise Exception( f"Job with name {self.job} is duplicated. Please make job name unique." ) job_data = matching_jobs[0] job_id = job_data["job_id"] active_runs = jobs_service.list_runs(job_id, active_only=True).get("runs", []) for run in active_runs: if self.existing_runs == "pass": dbx_echo("Passing the existing runs status check") if self.existing_runs == "wait": dbx_echo( f'Waiting for job run with id {run["run_id"]} to be finished' ) _wait_run(self.api_client, run) if self.existing_runs == "cancel": dbx_echo(f'Cancelling run with id {run["run_id"]}') _cancel_run(self.api_client, run) if self.prepared_parameters: dbx_echo( f"Default launch parameters are overridden with the following: {self.prepared_parameters}" ) # we don't do a null-check here since the job existence will be already done during listing above. job_settings = job_data.get("settings") # here we define the job type to correctly pass parameters extra_payload_key = _define_payload_key(job_settings) extra_payload = {extra_payload_key: self.prepared_parameters} run_data = jobs_service.run_now(job_id, **extra_payload) else: run_data = jobs_service.run_now(job_id) return run_data, job_id
def launch(self) -> Tuple[Dict[Any, Any], Optional[str]]: dbx_echo("Launching job via run now API") jobs_service = JobsService(self.api_client) all_jobs = jobs_service.list_jobs().get("jobs", []) matching_jobs = [ j for j in all_jobs if j["settings"]["name"] == self.job ] if not matching_jobs: raise Exception(f"Job with name {self.job} not found") if len(matching_jobs) > 1: raise Exception( f"Job with name {self.job} is duplicated. Please make job name unique." ) job_id = matching_jobs[0]["job_id"] active_runs = jobs_service.list_runs(job_id, active_only=True).get("runs", []) for run in active_runs: if self.existing_runs == "pass": dbx_echo("Passing the existing runs status check") if self.existing_runs == "wait": dbx_echo( f'Waiting for job run with id {run["run_id"]} to be finished' ) _wait_run(self.api_client, run) if self.existing_runs == "cancel": dbx_echo(f'Cancelling run with id {run["run_id"]}') _cancel_run(self.api_client, run) if self.override_parameters: _prepared_parameters = sum( [[k, v] for k, v in self.override_parameters.items()], []) dbx_echo( f"Default launch parameters are overridden with the following: {_prepared_parameters}" ) run_data = jobs_service.run_now(job_id, python_params=_prepared_parameters) else: run_data = jobs_service.run_now(job_id) return run_data, job_id
def launch( environment: str, job: str, trace: bool, kill_on_sigterm: bool, existing_runs: str, as_run_submit: bool, tags: List[str], parameters: List[str], parameters_raw: Optional[str], ): dbx_echo(f"Launching job {job} on environment {environment}") api_client = prepare_environment(environment) additional_tags = parse_multiple(tags) if parameters_raw: prepared_parameters = parameters_raw else: override_parameters = parse_multiple(parameters) prepared_parameters = sum([[k, v] for k, v in override_parameters.items()], []) filter_string = generate_filter_string(environment) run_info = _find_deployment_run(filter_string, additional_tags, as_run_submit, environment) deployment_run_id = run_info["run_id"] with mlflow.start_run(run_id=deployment_run_id) as deployment_run: with mlflow.start_run(nested=True): artifact_base_uri = deployment_run.info.artifact_uri if not as_run_submit: run_launcher = RunNowLauncher(job, api_client, artifact_base_uri, existing_runs, prepared_parameters) else: run_launcher = RunSubmitLauncher(job, api_client, artifact_base_uri, existing_runs, prepared_parameters, environment) run_data, job_id = run_launcher.launch() jobs_service = JobsService(api_client) run_info = jobs_service.get_run(run_data["run_id"]) run_url = run_info.get("run_page_url") dbx_echo(f"Run URL: {run_url}") if trace: if kill_on_sigterm: dbx_echo("Click Ctrl+C to stop the run") try: dbx_status = _trace_run(api_client, run_data) except KeyboardInterrupt: dbx_status = "CANCELLED" dbx_echo("Cancelling the run gracefully") _cancel_run(api_client, run_data) dbx_echo("Run cancelled successfully") else: dbx_status = _trace_run(api_client, run_data) if dbx_status == "ERROR": raise Exception( "Tracked run failed during execution. Please check Databricks UI for run logs" ) dbx_echo("Launch command finished") else: dbx_status = "NOT_TRACKED" dbx_echo( "Run successfully launched in non-tracking mode. Please check Databricks UI for job status" ) deployment_tags = { "job_id": job_id, "run_id": run_data.get("run_id"), "dbx_action_type": "launch", "dbx_status": dbx_status, "dbx_environment": environment, } mlflow.set_tags(deployment_tags)
def _get_run_status(api_client: ApiClient, run_data: Dict[str, Any]) -> Dict[str, Any]: jobs_service = JobsService(api_client) run_status = jobs_service.get_run(run_data["run_id"]) return run_status
def _cancel_run(api_client: ApiClient, run_data: Dict[str, Any]): jobs_service = JobsService(api_client) jobs_service.cancel_run(run_data["run_id"]) _wait_run(api_client, run_data)
def launch( environment: str, job: str, trace: bool, kill_on_sigterm: bool, existing_runs: str, tags: List[str], parameters: List[str], ): dbx_echo(f"Launching job {job} on environment {environment}") api_client = prepare_environment(environment) additional_tags = parse_multiple(tags) override_parameters = parse_multiple(parameters) filter_string = generate_filter_string(environment, additional_tags) runs = mlflow.search_runs(filter_string=filter_string, max_results=1) if runs.empty: raise EnvironmentError( f""" No runs provided per given set of filters: {filter_string} Please check experiment UI to verify current status of deployments. """ ) run_info = runs.iloc[0].to_dict() dbx_echo("Successfully found deployment per given job name") deployment_run_id = run_info["run_id"] with mlflow.start_run(run_id=deployment_run_id) as deployment_run: with mlflow.start_run(nested=True): artifact_base_uri = deployment_run.info.artifact_uri deployments = _load_deployments(api_client, artifact_base_uri) job_id = deployments.get(job) if not job_id: raise Exception( f"Job with name {job} not found in the latest deployment" % job ) jobs_service = JobsService(api_client) active_runs = jobs_service.list_runs(job_id, active_only=True).get( "runs", [] ) for run in active_runs: if existing_runs == "pass": dbx_echo("Passing the existing runs status check") if existing_runs == "wait": dbx_echo( f'Waiting for job run with id {run["run_id"]} to be finished' ) _wait_run(api_client, run) if existing_runs == "cancel": dbx_echo(f'Cancelling run with id {run["run_id"]}') _cancel_run(api_client, run) if override_parameters: _prepared_parameters = sum( [[k, v] for k, v in override_parameters.items()], [] ) dbx_echo( f"Default launch parameters are overridden with the following: {_prepared_parameters}" ) run_data = jobs_service.run_now( job_id, python_params=_prepared_parameters ) else: run_data = jobs_service.run_now(job_id) if trace: dbx_echo("Tracing job run") if kill_on_sigterm: dbx_echo("Click Ctrl+C to stop the job run") try: dbx_status = _trace_run(api_client, run_data) except KeyboardInterrupt: dbx_status = "CANCELLED" dbx_echo("Cancelling the run gracefully") _cancel_run(api_client, run_data) dbx_echo("Run cancelled successfully") else: dbx_status = _trace_run(api_client, run_data) if dbx_status == "ERROR": raise Exception( "Tracked job failed during execution. " "Please check Databricks UI for job logs" ) dbx_echo("Launch command finished") else: dbx_status = "NOT_TRACKED" dbx_echo( "Job successfully launched in non-tracking mode. Please check Databricks UI for job status" ) deployment_tags = { "job_id": job_id, "run_id": run_data["run_id"], "dbx_action_type": "launch", "dbx_status": dbx_status, "dbx_environment": environment, } mlflow.set_tags(deployment_tags)