Beispiel #1
0
def check_explore_cache_perms(_self: Any, cache_key: str) -> None:
    """
    Loads async explore_json request data from cache and performs access check

    :param _self: the Superset view instance
    :param cache_key: the cache key passed into /explore_json/data/
    :raises SupersetSecurityException: If the user cannot access the resource
    """
    cached = cache_manager.cache.get(cache_key)
    if not cached:
        raise CacheLoadError("Cached data not found")

    check_datasource_perms(_self, form_data=cached["form_data"])
Beispiel #2
0
    def get(
        cls,
        key: Optional[str],
        region: CacheRegion = CacheRegion.DEFAULT,
        force_query: Optional[bool] = False,
        force_cached: Optional[bool] = False,
    ) -> "QueryCacheManager":
        """
        Initialize QueryCacheManager by query-cache key
        """
        query_cache = cls()
        if not key or not _cache[region] or force_query:
            return query_cache

        cache_value = _cache[region].get(key)
        if cache_value:
            logger.info("Cache key: %s", key)
            stats_logger.incr("loading_from_cache")
            try:
                query_cache.df = cache_value["df"]
                query_cache.query = cache_value["query"]
                query_cache.annotation_data = cache_value.get("annotation_data", {})
                query_cache.applied_template_filters = cache_value.get(
                    "applied_template_filters", []
                )
                query_cache.status = QueryStatus.SUCCESS
                query_cache.is_loaded = True
                query_cache.is_cached = cache_value is not None
                query_cache.cache_dttm = (
                    cache_value["dttm"] if cache_value is not None else None
                )
                query_cache.cache_value = cache_value
                stats_logger.incr("loaded_from_cache")
            except KeyError as ex:
                logger.exception(ex)
                logger.error(
                    "Error reading cache: %s",
                    error_msg_from_exception(ex),
                    exc_info=True,
                )
            logger.info("Serving from cache")

        if force_cached and not query_cache.is_loaded:
            logger.warning(
                "force_cached (QueryContext): value not found for key %s", key
            )
            raise CacheLoadError("Error loading data from cache")
        return query_cache
Beispiel #3
0
    def get_df_payload(  # pylint: disable=too-many-statements,too-many-locals
        self, query_obj: QueryObject, force_cached: Optional[bool] = False,
    ) -> Dict[str, Any]:
        """Handles caching around the df payload retrieval"""
        cache_key = self.query_cache_key(query_obj)
        logger.info("Cache key: %s", cache_key)
        is_loaded = False
        stacktrace = None
        df = pd.DataFrame()
        cache_value = None
        status = None
        query = ""
        annotation_data = {}
        error_message = None
        if cache_key and cache_manager.data_cache and not self.force:
            cache_value = cache_manager.data_cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loading_from_cache")
                try:
                    df = cache_value["df"]
                    query = cache_value["query"]
                    annotation_data = cache_value.get("annotation_data", {})
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                    stats_logger.incr("loaded_from_cache")
                except KeyError as ex:
                    logger.exception(ex)
                    logger.error(
                        "Error reading cache: %s", utils.error_msg_from_exception(ex)
                    )
                logger.info("Serving from cache")

        if force_cached and not is_loaded:
            logger.warning(
                "force_cached (QueryContext): value not found for key %s", cache_key
            )
            raise CacheLoadError("Error loading data from cache")

        if query_obj and not is_loaded:
            try:
                invalid_columns = [
                    col
                    for col in query_obj.columns
                    + query_obj.groupby
                    + utils.get_column_names_from_metrics(query_obj.metrics)
                    if col not in self.datasource.column_names and col != DTTM_ALIAS
                ]
                if invalid_columns:
                    raise QueryObjectValidationError(
                        _(
                            "Columns missing in datasource: %(invalid_columns)s",
                            invalid_columns=invalid_columns,
                        )
                    )
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                annotation_data = self.get_annotation_data(query_obj)

                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    if not self.force:
                        stats_logger.incr("loaded_from_source_without_force")
                    is_loaded = True
            except QueryObjectValidationError as ex:
                error_message = str(ex)
                status = utils.QueryStatus.FAILED
            except Exception as ex:  # pylint: disable=broad-except
                logger.exception(ex)
                if not error_message:
                    error_message = str(ex)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and status != utils.QueryStatus.FAILED:
                set_and_log_cache(
                    cache_manager.data_cache,
                    cache_key,
                    {"df": df, "query": query, "annotation_data": annotation_data},
                    self.cache_timeout,
                    self.datasource.uid,
                )
        return {
            "cache_key": cache_key,
            "cached_dttm": cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "annotation_data": annotation_data,
            "error": error_message,
            "is_cached": cache_value is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index),
        }