Ejemplo n.º 1
0
    def get_payload(
        self, cache_query_context: Optional[bool] = False, force_cached: bool = False,
    ) -> Dict[str, Any]:
        """Returns the query results with both metadata and data"""

        # Get all the payloads from the QueryObjects
        query_results = [
            get_query_results(
                query_obj.result_type or self.result_type, self, query_obj, force_cached
            )
            for query_obj in self.queries
        ]
        return_value = {"queries": query_results}

        if cache_query_context:
            cache_key = self.cache_key()
            set_and_log_cache(
                cache_manager.cache,
                cache_key,
                {"data": self.cache_values},
                self.cache_timeout,
            )
            return_value["cache_key"] = cache_key  # type: ignore

        return return_value
def load_explore_json_into_cache(  # pylint: disable=too-many-locals
    job_metadata: Dict[str, Any],
    form_data: Dict[str, Any],
    response_type: Optional[str] = None,
    force: bool = False,
) -> None:
    cache_key_prefix = "ejr-"  # ejr: explore_json request
    try:
        ensure_user_is_set(job_metadata.get("user_id"))
        datasource_id, datasource_type = get_datasource_info(
            None, None, form_data)

        # Perform a deep copy here so that below we can cache the original
        # value of the form_data object. This is necessary since the viz
        # objects modify the form_data object. If the modified version were
        # to be cached here, it will lead to a cache miss when clients
        # attempt to retrieve the value of the completed async query.
        original_form_data = copy.deepcopy(form_data)

        viz_obj = get_viz(
            datasource_type=cast(str, datasource_type),
            datasource_id=datasource_id,
            form_data=form_data,
            force=force,
        )
        # run query & cache results
        payload = viz_obj.get_payload()
        if viz_obj.has_error(payload):
            raise SupersetVizException(errors=payload["errors"])

        # Cache the original form_data value for async retrieval
        cache_value = {
            "form_data": original_form_data,
            "response_type": response_type,
        }
        cache_key = generate_cache_key(cache_value, cache_key_prefix)
        set_and_log_cache(cache_manager.cache, cache_key, cache_value)
        result_url = f"/superset/explore_json/data/{cache_key}"
        async_query_manager.update_job(
            job_metadata,
            async_query_manager.STATUS_DONE,
            result_url=result_url,
        )
    except SoftTimeLimitExceeded as ex:
        logger.warning(
            "A timeout occurred while loading explore json, error: %s", ex)
        raise ex
    except Exception as exc:
        if isinstance(exc, SupersetVizException):
            errors = exc.errors  # pylint: disable=no-member
        else:
            error = (
                exc.message if hasattr(exc, "message") else str(exc)  # type: ignore # pylint: disable=no-member
            )
            errors = [error]

        async_query_manager.update_job(job_metadata,
                                       async_query_manager.STATUS_ERROR,
                                       errors=errors)
        raise exc
Ejemplo n.º 3
0
 def set(
     key: Optional[str],
     value: Dict[str, Any],
     timeout: Optional[int] = None,
     datasource_uid: Optional[str] = None,
     region: CacheRegion = CacheRegion.DEFAULT,
 ) -> None:
     """
     set value to specify cache region, proxy for `set_and_log_cache`
     """
     if key:
         set_and_log_cache(_cache[region], key, value, timeout, datasource_uid)
Ejemplo n.º 4
0
def load_explore_json_into_cache(
    job_metadata: Dict[str, Any],
    form_data: Dict[str, Any],
    response_type: Optional[str] = None,
    force: bool = False,
) -> None:
    with app.app_context():  # type: ignore
        cache_key_prefix = "ejr-"  # ejr: explore_json request
        try:
            ensure_user_is_set(job_metadata.get("user_id"))
            datasource_id, datasource_type = get_datasource_info(
                None, None, form_data)

            viz_obj = get_viz(
                datasource_type=cast(str, datasource_type),
                datasource_id=datasource_id,
                form_data=form_data,
                force=force,
            )
            # run query & cache results
            payload = viz_obj.get_payload()
            if viz_obj.has_error(payload):
                raise SupersetVizException(errors=payload["errors"])

            # cache form_data for async retrieval
            cache_value = {
                "form_data": form_data,
                "response_type": response_type
            }
            cache_key = generate_cache_key(cache_value, cache_key_prefix)
            set_and_log_cache(cache_manager.cache, cache_key, cache_value)
            result_url = f"/superset/explore_json/data/{cache_key}"
            async_query_manager.update_job(
                job_metadata,
                async_query_manager.STATUS_DONE,
                result_url=result_url,
            )
        except Exception as exc:
            if isinstance(exc, SupersetVizException):
                errors = exc.errors  # pylint: disable=no-member
            else:
                error = (
                    exc.message if hasattr(exc, "message") else str(exc)  # type: ignore # pylint: disable=no-member
                )
                errors = [error]

            async_query_manager.update_job(job_metadata,
                                           async_query_manager.STATUS_ERROR,
                                           errors=errors)
            raise exc

        return None
Ejemplo n.º 5
0
    def get_payload(self, **kwargs: Any) -> Dict[str, Any]:
        cache_query_context = kwargs.get("cache_query_context", False)
        force_cached = kwargs.get("force_cached", False)

        # Get all the payloads from the QueryObjects
        query_results = [
            self.get_single_payload(query_object, force_cached=force_cached)
            for query_object in self.queries
        ]
        return_value = {"queries": query_results}

        if cache_query_context:
            cache_key = self.cache_key()
            set_and_log_cache(
                cache_manager.cache,
                cache_key,
                {"data": self.cache_values},
                self.cache_timeout,
            )
            return_value["cache_key"] = cache_key  # type: ignore

        return return_value
Ejemplo n.º 6
0
    def get_df_payload(  # pylint: disable=too-many-statements,too-many-locals
        self, query_obj: QueryObject, force_cached: Optional[bool] = False,
    ) -> Dict[str, Any]:
        """Handles caching around the df payload retrieval"""
        cache_key = self.query_cache_key(query_obj)
        logger.info("Cache key: %s", cache_key)
        is_loaded = False
        stacktrace = None
        df = pd.DataFrame()
        cache_value = None
        status = None
        query = ""
        annotation_data = {}
        error_message = None
        if cache_key and cache_manager.data_cache and not self.force:
            cache_value = cache_manager.data_cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loading_from_cache")
                try:
                    df = cache_value["df"]
                    query = cache_value["query"]
                    annotation_data = cache_value.get("annotation_data", {})
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                    stats_logger.incr("loaded_from_cache")
                except KeyError as ex:
                    logger.exception(ex)
                    logger.error(
                        "Error reading cache: %s", utils.error_msg_from_exception(ex)
                    )
                logger.info("Serving from cache")

        if force_cached and not is_loaded:
            logger.warning(
                "force_cached (QueryContext): value not found for key %s", cache_key
            )
            raise CacheLoadError("Error loading data from cache")

        if query_obj and not is_loaded:
            try:
                invalid_columns = [
                    col
                    for col in query_obj.columns
                    + query_obj.groupby
                    + utils.get_column_names_from_metrics(query_obj.metrics)
                    if col not in self.datasource.column_names and col != DTTM_ALIAS
                ]
                if invalid_columns:
                    raise QueryObjectValidationError(
                        _(
                            "Columns missing in datasource: %(invalid_columns)s",
                            invalid_columns=invalid_columns,
                        )
                    )
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                annotation_data = self.get_annotation_data(query_obj)

                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    if not self.force:
                        stats_logger.incr("loaded_from_source_without_force")
                    is_loaded = True
            except QueryObjectValidationError as ex:
                error_message = str(ex)
                status = utils.QueryStatus.FAILED
            except Exception as ex:  # pylint: disable=broad-except
                logger.exception(ex)
                if not error_message:
                    error_message = str(ex)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and status != utils.QueryStatus.FAILED:
                set_and_log_cache(
                    cache_manager.data_cache,
                    cache_key,
                    {"df": df, "query": query, "annotation_data": annotation_data},
                    self.cache_timeout,
                    self.datasource.uid,
                )
        return {
            "cache_key": cache_key,
            "cached_dttm": cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "annotation_data": annotation_data,
            "error": error_message,
            "is_cached": cache_value is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index),
        }