def _handle_databricks_task_execution(task, hook, log, submitted_run_id): """ Handles the Databricks + Prefect lifecycle logic for a Databricks task Args: - task (prefect.Task) : Prefect task being handled - hook (prefect.tasks.databricks.databricks_hook.DatabricksHook): Databricks Hook - log (logger): Prefect logging instance - submitted_run_id (str): run ID returned after submitting or running Databricks job """ log.info("Run submitted with run_id: %s", submitted_run_id) run_page_url = hook.get_run_page_url(submitted_run_id) log.info("Run submitted with config : %s", task.json) log.info("View run status, Spark UI, and logs at %s", run_page_url) while True: run_state = hook.get_run_state(submitted_run_id) if run_state.is_terminal: if run_state.is_successful: log.info("%s completed successfully.", task.name) log.info("View run status, Spark UI, and logs at %s", run_page_url) return else: error_message = "{t} failed with terminal state: {s}".format( t=task.name, s=run_state) raise PrefectException(error_message) else: log.info("%s in run state: %s", task.name, run_state) log.info("View run status, Spark UI, and logs at %s", run_page_url) log.info("Sleeping for %s seconds.", task.polling_period_seconds) time.sleep(task.polling_period_seconds)
def is_terminal(self) -> bool: """True if the current state is a terminal state.""" if self.life_cycle_state not in RUN_LIFE_CYCLE_STATES: raise PrefectException( ("Unexpected life cycle state: {}: If the state has " "been introduced recently, please check the Databricks user " "guide for troubleshooting information").format( self.life_cycle_state)) return self.life_cycle_state in ("TERMINATED", "SKIPPED", "INTERNAL_ERROR")
def _do_api_call(self, endpoint_info, json): """ Utility function to perform an API call with retries Args: - endpoint_info (tuple[string, string]): Tuple of method and endpoint - json (dict): Parameters for this API call. Returns: dict: If the api call returns a OK status code, this function returns the response in JSON. Otherwise, we throw an PrefectException. """ method, endpoint = endpoint_info if "token" in self.databricks_conn: logging.info("Using token auth. ") auth = _TokenAuth(self.databricks_conn["token"]) host = self._parse_host(self.databricks_conn["host"]) else: logging.info("Using basic auth. ") auth = (self.databricks_conn["login"], self.databricks_conn["password"]) host = self.databricks_conn["host"] url = "https://{host}/{endpoint}".format( host=self._parse_host(host), endpoint=endpoint ) if method == "GET": request_func = requests.get elif method == "POST": request_func = requests.post elif method == "PATCH": request_func = requests.patch else: raise PrefectException("Unexpected HTTP Method: " + method) attempt_num = 1 while True: try: response = request_func( url, json=json, auth=auth, headers=USER_AGENT_HEADER, timeout=self.timeout_seconds, ) response.raise_for_status() return response.json() except requests_exceptions.RequestException as e: if not _retryable_error(e): # In this case, the user probably made a mistake. # Don't retry. raise PrefectException( "Response: {0}, Status Code: {1}".format( e.response.content, e.response.status_code ) ) from e self._log_request_error(attempt_num, e) if attempt_num == self.retry_limit: raise PrefectException( ( "API requests to Databricks failed {} times. " + "Giving up." ).format(self.retry_limit) ) attempt_num += 1 sleep(self.retry_delay)