def get_df_payload(self, query_obj: QueryObject, force_cached: Optional[bool] = False) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.query_cache_key(query_obj) cache = QueryCacheManager.get( cache_key, CacheRegion.DATA, self._query_context.force, force_cached, ) if query_obj and cache_key and not cache.is_loaded: try: invalid_columns = [ col for col in get_column_names_from_columns(query_obj.columns) + get_column_names_from_metrics(query_obj.metrics or []) if (col not in self._qc_datasource.column_names and col != DTTM_ALIAS) ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, )) query_result = self.get_query_result(query_obj) annotation_data = self.get_annotation_data(query_obj) cache.set_query_result( key=cache_key, query_result=query_result, annotation_data=annotation_data, force_query=self._query_context.force, timeout=self.get_cache_timeout(), datasource_uid=self._qc_datasource.uid, region=CacheRegion.DATA, ) except QueryObjectValidationError as ex: cache.error_message = str(ex) cache.status = QueryStatus.FAILED return { "cache_key": cache_key, "cached_dttm": cache.cache_dttm, "cache_timeout": self.get_cache_timeout(), "df": cache.df, "applied_template_filters": cache.applied_template_filters, "annotation_data": cache.annotation_data, "error": cache.error_message, "is_cached": cache.is_cached, "query": cache.query, "status": cache.status, "stacktrace": cache.stacktrace, "rowcount": len(cache.df.index), "from_dttm": query_obj.from_dttm, "to_dttm": query_obj.to_dttm, }
def get_df_payload( # pylint: disable=too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except KeyError as ex: logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, )) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: set_and_log_cache( cache_key, df, query, cached_dttm, self.cache_timeout, self.datasource.uid, ) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_value is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload( # pylint: disable=too-many-statements,too-many-locals self, query_obj: QueryObject, force_cached: Optional[bool] = False, ) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.query_cache_key(query_obj) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cache_value = None status = None query = "" annotation_data = {} error_message = None if cache_key and cache_manager.data_cache and not self.force: cache_value = cache_manager.data_cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] annotation_data = cache_value.get("annotation_data", {}) status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except KeyError as ex: logger.exception(ex) logger.error( "Error reading cache: %s", utils.error_msg_from_exception(ex) ) logger.info("Serving from cache") if force_cached and not is_loaded: logger.warning( "force_cached (QueryContext): value not found for key %s", cache_key ) raise CacheLoadError("Error loading data from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names and col != DTTM_ALIAS ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, ) ) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] annotation_data = self.get_annotation_data(query_obj) if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and status != utils.QueryStatus.FAILED: set_and_log_cache( cache_manager.data_cache, cache_key, {"df": df, "query": query, "annotation_data": annotation_data}, self.cache_timeout, self.datasource.uid, ) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "annotation_data": annotation_data, "error": error_message, "is_cached": cache_value is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload(self, query_obj: QueryObject, force_cached: Optional[bool] = False) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.query_cache_key(query_obj) cache = QueryCacheManager.get( cache_key, CacheRegion.DATA, self._query_context.force, force_cached, ) if query_obj and cache_key and not cache.is_loaded: try: invalid_columns = [ col for col in get_column_names_from_columns(query_obj.columns) + get_column_names_from_metrics(query_obj.metrics or []) if (col not in self._qc_datasource.column_names and col != DTTM_ALIAS) ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, )) query_result = self.get_query_result(query_obj) annotation_data = self.get_annotation_data(query_obj) cache.set_query_result( key=cache_key, query_result=query_result, annotation_data=annotation_data, force_query=self._query_context.force, timeout=self.get_cache_timeout(), datasource_uid=self._qc_datasource.uid, region=CacheRegion.DATA, ) except QueryObjectValidationError as ex: cache.error_message = str(ex) cache.status = QueryStatus.FAILED # the N-dimensional DataFrame has converteds into flat DataFrame # by `flatten operator`, "comma" in the column is escaped by `escape_separator` # the result DataFrame columns should be unescaped label_map = { unescape_separator(col): [unescape_separator(col) for col in re.split(r"(?<!\\),\s", col)] for col in cache.df.columns.values } cache.df.columns = [ unescape_separator(col) for col in cache.df.columns.values ] return { "cache_key": cache_key, "cached_dttm": cache.cache_dttm, "cache_timeout": self.get_cache_timeout(), "df": cache.df, "applied_template_filters": cache.applied_template_filters, "annotation_data": cache.annotation_data, "error": cache.error_message, "is_cached": cache.is_cached, "query": cache.query, "status": cache.status, "stacktrace": cache.stacktrace, "rowcount": len(cache.df.index), "from_dttm": query_obj.from_dttm, "to_dttm": query_obj.to_dttm, "label_map": label_map, }