예제 #1
0
    def get_df_payload(  # pylint: disable=too-many-locals,too-many-statements
            self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]:
        """Handles caching around the df payload retrieval"""
        cache_key = self.cache_key(query_obj, **kwargs)
        logger.info("Cache key: %s", cache_key)
        is_loaded = False
        stacktrace = None
        df = pd.DataFrame()
        cached_dttm = datetime.utcnow().isoformat().split(".")[0]
        cache_value = None
        status = None
        query = ""
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loading_from_cache")
                try:
                    cache_value = pkl.loads(cache_value)
                    df = cache_value["df"]
                    query = cache_value["query"]
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                    stats_logger.incr("loaded_from_cache")
                except Exception as ex:  # pylint: disable=broad-except
                    logger.exception(ex)
                    logger.error("Error reading cache: %s",
                                 utils.error_msg_from_exception(ex))
                logger.info("Serving from cache")

        if query_obj and not is_loaded:
            try:
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    if not self.force:
                        stats_logger.incr("loaded_from_source_without_force")
                    is_loaded = True
            except Exception as ex:  # pylint: disable=broad-except
                logger.exception(ex)
                if not error_message:
                    error_message = "{}".format(ex)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
                try:
                    cache_value = dict(dttm=cached_dttm, df=df, query=query)
                    cache_binary = pkl.dumps(cache_value,
                                             protocol=pkl.HIGHEST_PROTOCOL)

                    logger.info("Caching %d chars at key %s",
                                len(cache_binary), cache_key)

                    stats_logger.incr("set_cache_key")
                    cache.set(cache_key,
                              cache_binary,
                              timeout=self.cache_timeout)
                except Exception as ex:  # pylint: disable=broad-except
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logger.warning("Could not cache key %s", cache_key)
                    logger.exception(ex)
                    cache.delete(cache_key)
        return {
            "cache_key": cache_key,
            "cached_dttm":
            cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "error": error_message,
            "is_cached": cache_key is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index),
        }
예제 #2
0
    def get_df_payload(self, query_obj, **kwargs):
        """Handles caching around the df paylod retrieval"""
        cache_key = query_obj.cache_key(
            datasource=self.datasource.uid, **kwargs) if query_obj else None
        logging.info('Cache key: {}'.format(cache_key))
        is_loaded = False
        stacktrace = None
        df = None
        cached_dttm = datetime.utcnow().isoformat().split('.')[0]
        cache_value = None
        status = None
        query = ''
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr('loaded_from_cache')
                try:
                    cache_value = pkl.loads(cache_value)
                    df = cache_value['df']
                    query = cache_value['query']
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                except Exception as e:
                    logging.exception(e)
                    logging.error('Error reading cache: ' +
                                  utils.error_msg_from_exception(e))
                logging.info('Serving from cache')

        if query_obj and not is_loaded:
            try:
                query_result = self.get_query_result(query_obj)
                status = query_result['status']
                query = query_result['query']
                error_message = query_result['error_message']
                df = query_result['df']
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr('loaded_from_source')
                    is_loaded = True
            except Exception as e:
                logging.exception(e)
                if not error_message:
                    error_message = '{}'.format(e)
                status = utils.QueryStatus.FAILED
                stacktrace = traceback.format_exc()

            if (
                    is_loaded and
                    cache_key and
                    cache and
                    status != utils.QueryStatus.FAILED):
                try:
                    cache_value = dict(
                        dttm=cached_dttm,
                        df=df if df is not None else None,
                        query=query,
                    )
                    cache_value = pkl.dumps(
                        cache_value, protocol=pkl.HIGHEST_PROTOCOL)

                    logging.info('Caching {} chars at key {}'.format(
                        len(cache_value), cache_key))

                    stats_logger.incr('set_cache_key')
                    cache.set(
                        cache_key,
                        cache_value,
                        timeout=self.cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning('Could not cache key {}'.format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        return {
            'cache_key': cache_key,
            'cached_dttm': cache_value['dttm'] if cache_value is not None else None,
            'cache_timeout': self.cache_timeout,
            'df': df,
            'error': error_message,
            'is_cached': cache_key is not None,
            'query': query,
            'status': status,
            'stacktrace': stacktrace,
            'rowcount': len(df.index) if df is not None else 0,
        }
예제 #3
0
    def get_df_payload(self, query_obj: QueryObject, **kwargs):
        """Handles caching around the df paylod retrieval"""
        extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict())
        cache_key = (
            query_obj.cache_key(
                datasource=self.datasource.uid,
                extra_cache_keys=extra_cache_keys,
                **kwargs
            )
            if query_obj
            else None
        )
        logging.info("Cache key: {}".format(cache_key))
        is_loaded = False
        stacktrace = None
        df = None
        cached_dttm = datetime.utcnow().isoformat().split(".")[0]
        cache_value = None
        status = None
        query = ""
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loaded_from_cache")
                try:
                    cache_value = pkl.loads(cache_value)
                    df = cache_value["df"]
                    query = cache_value["query"]
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                except Exception as e:
                    logging.exception(e)
                    logging.error(
                        "Error reading cache: " + utils.error_msg_from_exception(e)
                    )
                logging.info("Serving from cache")

        if query_obj and not is_loaded:
            try:
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    is_loaded = True
            except Exception as e:
                logging.exception(e)
                if not error_message:
                    error_message = "{}".format(e)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
                try:
                    cache_value = dict(
                        dttm=cached_dttm, df=df if df is not None else None, query=query
                    )
                    cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

                    logging.info(
                        "Caching {} chars at key {}".format(
                            len(cache_binary), cache_key
                        )
                    )

                    stats_logger.incr("set_cache_key")
                    cache.set(cache_key, cache_binary, timeout=self.cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning("Could not cache key {}".format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        return {
            "cache_key": cache_key,
            "cached_dttm": cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "error": error_message,
            "is_cached": cache_key is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index) if df is not None else 0,
        }
예제 #4
0
    def get_payload(self, force=False):
        """Handles caching around the json payload retrieval"""
        cache_key = self.cache_key
        payload = None
        force = force if force else self.form_data.get('force') == 'true'
        if not force and cache:
            payload = cache.get(cache_key)

        if payload:
            is_cached = True
            try:
                cached_data = zlib.decompress(payload)
                if PY3:
                    cached_data = cached_data.decode('utf-8')
                payload = json.loads(cached_data)
            except Exception as e:
                logging.error("Error reading cache: " +
                              utils.error_msg_from_exception(e))
                payload = None
            logging.info("Serving from cache")

        if not payload:
            data = None
            is_cached = False
            cache_timeout = self.cache_timeout
            stacktrace = None
            try:
                df = self.get_df()
                if not self.error_message:
                    data = self.get_data(df)
            except Exception as e:
                logging.exception(e)
                if not self.error_message:
                    self.error_message = str(e)
                self.status = utils.QueryStatus.FAILED
                data = None
                stacktrace = traceback.format_exc()
            payload = {
                'cache_key': cache_key,
                'cache_timeout': cache_timeout,
                'data': data,
                'error': self.error_message,
                'form_data': self.form_data,
                'query': self.query,
                'status': self.status,
                'stacktrace': stacktrace,
            }
            payload['cached_dttm'] = datetime.now().isoformat().split('.')[0]
            logging.info(
                "Caching for the next {} seconds".format(cache_timeout))
            data = self.json_dumps(payload)
            if PY3:
                data = bytes(data, 'utf-8')
            if cache and self.status != utils.QueryStatus.FAILED:
                try:
                    cache.set(cache_key,
                              zlib.compress(data),
                              timeout=cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning("Could not cache key {}".format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        payload['is_cached'] = is_cached
        return payload
    def get_df_payload(  # pylint: disable=too-many-locals,too-many-statements
            self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]:
        """Handles caching around the df payload retrieval"""
        cache_key = self.cache_key(query_obj, **kwargs)
        logger.info("Cache key: %s", cache_key)
        is_loaded = False
        stacktrace = None
        df = pd.DataFrame()
        cached_dttm = datetime.utcnow().isoformat().split(".")[0]
        cache_value = None
        status = None
        query = ""
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loading_from_cache")
                try:
                    df = cache_value["df"]
                    query = cache_value["query"]
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                    stats_logger.incr("loaded_from_cache")
                except Exception as ex:  # pylint: disable=broad-except
                    logger.exception(ex)
                    logger.error("Error reading cache: %s",
                                 utils.error_msg_from_exception(ex))
                logger.info("Serving from cache")

        if query_obj and not is_loaded:
            try:
                invalid_columns = [
                    col for col in query_obj.columns + query_obj.groupby +
                    [flt["col"] for flt in query_obj.filter] +
                    utils.get_column_names_from_metrics(query_obj.metrics)
                    if col not in self.datasource.column_names
                ]
                if invalid_columns:
                    raise QueryObjectValidationError(
                        _(
                            "Columns missing in datasource: %(invalid_columns)s",
                            invalid_columns=invalid_columns,
                        ))
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    if not self.force:
                        stats_logger.incr("loaded_from_source_without_force")
                    is_loaded = True
            except QueryObjectValidationError as ex:
                error_message = str(ex)
                status = utils.QueryStatus.FAILED
            except Exception as ex:  # pylint: disable=broad-except
                logger.exception(ex)
                if not error_message:
                    error_message = str(ex)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
                try:
                    cache_value = dict(dttm=cached_dttm, df=df, query=query)
                    stats_logger.incr("set_cache_key")
                    cache.set(cache_key,
                              cache_value,
                              timeout=self.cache_timeout)
                except Exception as ex:  # pylint: disable=broad-except
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logger.warning("Could not cache key %s", cache_key)
                    logger.exception(ex)
                    cache.delete(cache_key)
        return {
            "cache_key": cache_key,
            "cached_dttm":
            cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "error": error_message,
            "is_cached": cache_key is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index),
        }
예제 #6
0
파일: viz.py 프로젝트: dawsongzhao/superset
    def get_payload(self, force=False):
        """Handles caching around the json payload retrieval"""
        cache_key = self.cache_key
        payload = None
        force = force if force else self.form_data.get('force') == 'true'
        if not force and cache:
            payload = cache.get(cache_key)

        if payload:
            is_cached = True
            try:
                cached_data = zlib.decompress(payload)
                if PY3:
                    cached_data = cached_data.decode('utf-8')
                payload = json.loads(cached_data)
            except Exception as e:
                logging.error("Error reading cache: " +
                              utils.error_msg_from_exception(e))
                payload = None
            logging.info("Serving from cache")

        if not payload:
            data = None
            is_cached = False
            cache_timeout = self.cache_timeout
            stacktrace = None
            try:
                df = self.get_df()
                if not self.error_message:
                    data = self.get_data(df)
            except Exception as e:
                logging.exception(e)
                if not self.error_message:
                    self.error_message = str(e)
                self.status = utils.QueryStatus.FAILED
                data = None
                stacktrace = traceback.format_exc()
            payload = {
                'cache_key': cache_key,
                'cache_timeout': cache_timeout,
                'data': data,
                'error': self.error_message,
                'form_data': self.form_data,
                'query': self.query,
                'status': self.status,
                'stacktrace': stacktrace,
            }
            payload['cached_dttm'] = datetime.now().isoformat().split('.')[0]
            logging.info("Caching for the next {} seconds".format(
                cache_timeout))
            data = self.json_dumps(payload)
            if PY3:
                data = bytes(data, 'utf-8')
            if cache and self.status != utils.QueryStatus.FAILED:
                try:
                    cache.set(
                        cache_key,
                        zlib.compress(data),
                        timeout=cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning("Could not cache key {}".format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        payload['is_cached'] = is_cached
        return payload