def get_payload( self, cache_query_context: Optional[bool] = False, force_cached: bool = False, ) -> Dict[str, Any]: """Returns the query results with both metadata and data""" # Get all the payloads from the QueryObjects query_results = [ get_query_results( query_obj.result_type or self.result_type, self, query_obj, force_cached ) for query_obj in self.queries ] return_value = {"queries": query_results} if cache_query_context: cache_key = self.cache_key() set_and_log_cache( cache_manager.cache, cache_key, {"data": self.cache_values}, self.cache_timeout, ) return_value["cache_key"] = cache_key # type: ignore return return_value
def load_explore_json_into_cache( # pylint: disable=too-many-locals job_metadata: Dict[str, Any], form_data: Dict[str, Any], response_type: Optional[str] = None, force: bool = False, ) -> None: cache_key_prefix = "ejr-" # ejr: explore_json request try: ensure_user_is_set(job_metadata.get("user_id")) datasource_id, datasource_type = get_datasource_info( None, None, form_data) # Perform a deep copy here so that below we can cache the original # value of the form_data object. This is necessary since the viz # objects modify the form_data object. If the modified version were # to be cached here, it will lead to a cache miss when clients # attempt to retrieve the value of the completed async query. original_form_data = copy.deepcopy(form_data) viz_obj = get_viz( datasource_type=cast(str, datasource_type), datasource_id=datasource_id, form_data=form_data, force=force, ) # run query & cache results payload = viz_obj.get_payload() if viz_obj.has_error(payload): raise SupersetVizException(errors=payload["errors"]) # Cache the original form_data value for async retrieval cache_value = { "form_data": original_form_data, "response_type": response_type, } cache_key = generate_cache_key(cache_value, cache_key_prefix) set_and_log_cache(cache_manager.cache, cache_key, cache_value) result_url = f"/superset/explore_json/data/{cache_key}" async_query_manager.update_job( job_metadata, async_query_manager.STATUS_DONE, result_url=result_url, ) except SoftTimeLimitExceeded as ex: logger.warning( "A timeout occurred while loading explore json, error: %s", ex) raise ex except Exception as exc: if isinstance(exc, SupersetVizException): errors = exc.errors # pylint: disable=no-member else: error = ( exc.message if hasattr(exc, "message") else str(exc) # type: ignore # pylint: disable=no-member ) errors = [error] async_query_manager.update_job(job_metadata, async_query_manager.STATUS_ERROR, errors=errors) raise exc
def set( key: Optional[str], value: Dict[str, Any], timeout: Optional[int] = None, datasource_uid: Optional[str] = None, region: CacheRegion = CacheRegion.DEFAULT, ) -> None: """ set value to specify cache region, proxy for `set_and_log_cache` """ if key: set_and_log_cache(_cache[region], key, value, timeout, datasource_uid)
def load_explore_json_into_cache( job_metadata: Dict[str, Any], form_data: Dict[str, Any], response_type: Optional[str] = None, force: bool = False, ) -> None: with app.app_context(): # type: ignore cache_key_prefix = "ejr-" # ejr: explore_json request try: ensure_user_is_set(job_metadata.get("user_id")) datasource_id, datasource_type = get_datasource_info( None, None, form_data) viz_obj = get_viz( datasource_type=cast(str, datasource_type), datasource_id=datasource_id, form_data=form_data, force=force, ) # run query & cache results payload = viz_obj.get_payload() if viz_obj.has_error(payload): raise SupersetVizException(errors=payload["errors"]) # cache form_data for async retrieval cache_value = { "form_data": form_data, "response_type": response_type } cache_key = generate_cache_key(cache_value, cache_key_prefix) set_and_log_cache(cache_manager.cache, cache_key, cache_value) result_url = f"/superset/explore_json/data/{cache_key}" async_query_manager.update_job( job_metadata, async_query_manager.STATUS_DONE, result_url=result_url, ) except Exception as exc: if isinstance(exc, SupersetVizException): errors = exc.errors # pylint: disable=no-member else: error = ( exc.message if hasattr(exc, "message") else str(exc) # type: ignore # pylint: disable=no-member ) errors = [error] async_query_manager.update_job(job_metadata, async_query_manager.STATUS_ERROR, errors=errors) raise exc return None
def get_payload(self, **kwargs: Any) -> Dict[str, Any]: cache_query_context = kwargs.get("cache_query_context", False) force_cached = kwargs.get("force_cached", False) # Get all the payloads from the QueryObjects query_results = [ self.get_single_payload(query_object, force_cached=force_cached) for query_object in self.queries ] return_value = {"queries": query_results} if cache_query_context: cache_key = self.cache_key() set_and_log_cache( cache_manager.cache, cache_key, {"data": self.cache_values}, self.cache_timeout, ) return_value["cache_key"] = cache_key # type: ignore return return_value
def get_df_payload( # pylint: disable=too-many-statements,too-many-locals self, query_obj: QueryObject, force_cached: Optional[bool] = False, ) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.query_cache_key(query_obj) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cache_value = None status = None query = "" annotation_data = {} error_message = None if cache_key and cache_manager.data_cache and not self.force: cache_value = cache_manager.data_cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] annotation_data = cache_value.get("annotation_data", {}) status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except KeyError as ex: logger.exception(ex) logger.error( "Error reading cache: %s", utils.error_msg_from_exception(ex) ) logger.info("Serving from cache") if force_cached and not is_loaded: logger.warning( "force_cached (QueryContext): value not found for key %s", cache_key ) raise CacheLoadError("Error loading data from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names and col != DTTM_ALIAS ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, ) ) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] annotation_data = self.get_annotation_data(query_obj) if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and status != utils.QueryStatus.FAILED: set_and_log_cache( cache_manager.data_cache, cache_key, {"df": df, "query": query, "annotation_data": annotation_data}, self.cache_timeout, self.datasource.uid, ) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "annotation_data": annotation_data, "error": error_message, "is_cached": cache_value is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }