Ejemplo n.º 1
0
def _handle_databricks_task_execution(task, hook, log, submitted_run_id):
    """
    Handles the Databricks + Prefect lifecycle logic for a Databricks task

    Args:
        - task (prefect.Task) : Prefect task being handled
        - hook (prefect.tasks.databricks.databricks_hook.DatabricksHook): Databricks Hook
        - log (logger): Prefect logging instance
        - submitted_run_id (str): run ID returned after submitting or running Databricks job
    """

    log.info("Run submitted with run_id: %s", submitted_run_id)
    run_page_url = hook.get_run_page_url(submitted_run_id)

    log.info("Run submitted with config : %s", task.json)

    log.info("View run status, Spark UI, and logs at %s", run_page_url)
    while True:
        run_state = hook.get_run_state(submitted_run_id)
        if run_state.is_terminal:
            if run_state.is_successful:
                log.info("%s completed successfully.", task.name)
                log.info("View run status, Spark UI, and logs at %s",
                         run_page_url)
                return
            else:
                error_message = "{t} failed with terminal state: {s}".format(
                    t=task.name, s=run_state)
                raise PrefectException(error_message)
        else:
            log.info("%s in run state: %s", task.name, run_state)
            log.info("View run status, Spark UI, and logs at %s", run_page_url)
            log.info("Sleeping for %s seconds.", task.polling_period_seconds)
            time.sleep(task.polling_period_seconds)
Ejemplo n.º 2
0
 def is_terminal(self) -> bool:
     """True if the current state is a terminal state."""
     if self.life_cycle_state not in RUN_LIFE_CYCLE_STATES:
         raise PrefectException(
             ("Unexpected life cycle state: {}: If the state has "
              "been introduced recently, please check the Databricks user "
              "guide for troubleshooting information").format(
                  self.life_cycle_state))
     return self.life_cycle_state in ("TERMINATED", "SKIPPED",
                                      "INTERNAL_ERROR")
Ejemplo n.º 3
0
    def _do_api_call(self, endpoint_info, json):
        """
        Utility function to perform an API call with retries

        Args:
            - endpoint_info (tuple[string, string]): Tuple of method and endpoint

            - json (dict): Parameters for this API call.

        Returns:

            dict: If the api call returns a OK status code,
                this function returns the response in JSON. Otherwise,
                we throw an PrefectException.
        """
        method, endpoint = endpoint_info

        if "token" in self.databricks_conn:
            logging.info("Using token auth. ")
            auth = _TokenAuth(self.databricks_conn["token"])
            host = self._parse_host(self.databricks_conn["host"])
        else:
            logging.info("Using basic auth. ")
            auth = (self.databricks_conn["login"], self.databricks_conn["password"])
            host = self.databricks_conn["host"]

        url = "https://{host}/{endpoint}".format(
            host=self._parse_host(host), endpoint=endpoint
        )

        if method == "GET":
            request_func = requests.get
        elif method == "POST":
            request_func = requests.post
        elif method == "PATCH":
            request_func = requests.patch
        else:
            raise PrefectException("Unexpected HTTP Method: " + method)

        attempt_num = 1
        while True:
            try:
                response = request_func(
                    url,
                    json=json,
                    auth=auth,
                    headers=USER_AGENT_HEADER,
                    timeout=self.timeout_seconds,
                )
                response.raise_for_status()
                return response.json()
            except requests_exceptions.RequestException as e:
                if not _retryable_error(e):
                    # In this case, the user probably made a mistake.
                    # Don't retry.
                    raise PrefectException(
                        "Response: {0}, Status Code: {1}".format(
                            e.response.content, e.response.status_code
                        )
                    ) from e

                self._log_request_error(attempt_num, e)

            if attempt_num == self.retry_limit:
                raise PrefectException(
                    (
                        "API requests to Databricks failed {} times. " + "Giving up."
                    ).format(self.retry_limit)
                )

            attempt_num += 1
            sleep(self.retry_delay)