def load_query_context_from_cache( # pylint: disable=no-self-use self, cache_key: str) -> Dict[str, Any]: cache_value = cache.get(cache_key) if not cache_value: raise ChartDataCacheLoadError("Cached data not found") return cache_value["data"]
def download(self, path=''): logging.info('Start download from google drive') logging.info(path) report_filename = self.get_report_filename(self.report, self.from_date, self.to_date) if cache: cache_key = path cache_timeout = CONNECTOR_INFO.get('report_cache_timeout', 60 * 60) z_report = cache.get(cache_key) if z_report is not None: return petl.io.fromjson( petl.MemorySource(zlib.decompress(z_report))) logging.info('Download Report from {}'.format(path)) storage = self.storage[0] storage.init() fname = '{}.json'.format(self.report) with storage.open(path) as archive_file: with zipfile.ZipFile(archive_file) as zip_file: # logging.info(fname) report = zip_file.read(fname) z_report = zlib.compress(report) cache.set(cache_key, z_report, timeout=cache_timeout) return petl.io.fromjson(petl.MemorySource(report)) else: # move to init if not os.path.exists(self.report_folder): os.makedirs(self.report_folder) if not os.path.exists(report_filename): logging.info('Download Report from {}'.format(path)) storage = self.storage[0] storage.init() fname = '{}.json'.format(self.report) with storage.open(path) as archive_file: with zipfile.ZipFile(archive_file) as zip_file: # logging.info(fname) report = zip_file.read(fname) with open(report_filename, 'wb') as f: f.write(report) logging.info('Read from {}'.format(report_filename)) report = petl.io.fromjson(report_filename) return report return []
def wrapper(*args, **kwargs): # check if the user can access the resource check_perms(*args, **kwargs) # for POST requests we can't set cache headers, use the response # cache nor use conditional requests; this will still use the # dataframe cache in `superset/viz.py`, though. if request.method == "POST": return f(*args, **kwargs) response = None if cache: try: # build the cache key from the function arguments and any # other additional GET arguments (like `form_data`, eg). key_args = list(args) key_kwargs = kwargs.copy() key_kwargs.update(request.args) cache_key = wrapper.make_cache_key(f, *key_args, **key_kwargs) response = cache.get(cache_key) except Exception: # pylint: disable=broad-except if app.debug: raise logging.exception( "Exception possibly due to cache backend.") # if no response was cached, compute it using the wrapped function if response is None: response = f(*args, **kwargs) # add headers for caching: Last Modified, Expires and ETag response.cache_control.public = True response.last_modified = datetime.utcnow() expiration = max_age if max_age != 0 else FAR_FUTURE response.expires = response.last_modified + timedelta( seconds=expiration) response.add_etag() # if we have a cache, store the response from the request if cache: try: cache.set(cache_key, response, timeout=max_age) except Exception: # pylint: disable=broad-except if app.debug: raise logging.exception( "Exception possibly due to cache backend.") return response.make_conditional(request)
def wrapper(*args, **kwargs): # check if the user can access the resource check_perms(*args, **kwargs) # for POST requests we can't set cache headers, use the response # cache nor use conditional requests; this will still use the # dataframe cache in `superset/viz.py`, though. if request.method == 'POST': return f(*args, **kwargs) response = None if cache: try: # build the cache key from the function arguments and any # other additional GET arguments (like `form_data`, eg). key_args = list(args) key_kwargs = kwargs.copy() key_kwargs.update(request.args) cache_key = wrapper.make_cache_key(f, *key_args, **key_kwargs) response = cache.get(cache_key) except Exception: # pylint: disable=broad-except if app.debug: raise logging.exception('Exception possibly due to cache backend.') # if no response was cached, compute it using the wrapped function if response is None: response = f(*args, **kwargs) # add headers for caching: Last Modified, Expires and ETag response.cache_control.public = True response.last_modified = datetime.utcnow() expiration = max_age if max_age != 0 else FAR_FUTURE response.expires = \ response.last_modified + timedelta(seconds=expiration) response.add_etag() # if we have a cache, store the response from the request if cache: try: cache.set(cache_key, response, timeout=max_age) except Exception: # pylint: disable=broad-except if app.debug: raise logging.exception('Exception possibly due to cache backend.') return response.make_conditional(request)
def get_df_payload( # pylint: disable=too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except KeyError as ex: logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: invalid_columns = [ col for col in query_obj.columns + query_obj.groupby + utils.get_column_names_from_metrics(query_obj.metrics) if col not in self.datasource.column_names ] if invalid_columns: raise QueryObjectValidationError( _( "Columns missing in datasource: %(invalid_columns)s", invalid_columns=invalid_columns, )) query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except QueryObjectValidationError as ex: error_message = str(ex) status = utils.QueryStatus.FAILED except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = str(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: set_and_log_cache( cache_key, df, query, cached_dttm, self.cache_timeout, self.datasource.uid, ) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_value is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload( # pylint: disable=too-many-locals,too-many-statements self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]: """Handles caching around the df payload retrieval""" cache_key = self.cache_key(query_obj, **kwargs) logger.info("Cache key: %s", cache_key) is_loaded = False stacktrace = None df = pd.DataFrame() cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loading_from_cache") try: cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True stats_logger.incr("loaded_from_cache") except Exception as ex: # pylint: disable=broad-except logger.exception(ex) logger.error("Error reading cache: %s", utils.error_msg_from_exception(ex)) logger.info("Serving from cache") if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") if not self.force: stats_logger.incr("loaded_from_source_without_force") is_loaded = True except Exception as ex: # pylint: disable=broad-except logger.exception(ex) if not error_message: error_message = "{}".format(ex) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict(dttm=cached_dttm, df=df, query=query) cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) logger.info("Caching %d chars at key %s", len(cache_binary), cache_key) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_binary, timeout=self.cache_timeout) except Exception as ex: # pylint: disable=broad-except # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logger.warning("Could not cache key %s", cache_key) logger.exception(ex) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index), }
def get_df_payload(self, query_obj, **kwargs): """Handles caching around the df paylod retrieval""" cache_key = query_obj.cache_key( datasource=self.datasource.uid, **kwargs) if query_obj else None logging.info('Cache key: {}'.format(cache_key)) is_loaded = False stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split('.')[0] cache_value = None status = None query = '' error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr('loaded_from_cache') try: cache_value = pkl.loads(cache_value) df = cache_value['df'] query = cache_value['query'] status = utils.QueryStatus.SUCCESS is_loaded = True except Exception as e: logging.exception(e) logging.error('Error reading cache: ' + utils.error_msg_from_exception(e)) logging.info('Serving from cache') if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result['status'] query = query_result['query'] error_message = query_result['error_message'] df = query_result['df'] if status != utils.QueryStatus.FAILED: stats_logger.incr('loaded_from_source') is_loaded = True except Exception as e: logging.exception(e) if not error_message: error_message = '{}'.format(e) status = utils.QueryStatus.FAILED stacktrace = traceback.format_exc() if ( is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED): try: cache_value = dict( dttm=cached_dttm, df=df if df is not None else None, query=query, ) cache_value = pkl.dumps( cache_value, protocol=pkl.HIGHEST_PROTOCOL) logging.info('Caching {} chars at key {}'.format( len(cache_value), cache_key)) stats_logger.incr('set_cache_key') cache.set( cache_key, cache_value, timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning('Could not cache key {}'.format(cache_key)) logging.exception(e) cache.delete(cache_key) return { 'cache_key': cache_key, 'cached_dttm': cache_value['dttm'] if cache_value is not None else None, 'cache_timeout': self.cache_timeout, 'df': df, 'error': error_message, 'is_cached': cache_key is not None, 'query': query, 'status': status, 'stacktrace': stacktrace, 'rowcount': len(df.index) if df is not None else 0, }
def get_df_payload(self, query_obj: QueryObject, **kwargs): """Handles caching around the df paylod retrieval""" extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict()) cache_key = ( query_obj.cache_key( datasource=self.datasource.uid, extra_cache_keys=extra_cache_keys, **kwargs ) if query_obj else None ) logging.info("Cache key: {}".format(cache_key)) is_loaded = False stacktrace = None df = None cached_dttm = datetime.utcnow().isoformat().split(".")[0] cache_value = None status = None query = "" error_message = None if cache_key and cache and not self.force: cache_value = cache.get(cache_key) if cache_value: stats_logger.incr("loaded_from_cache") try: cache_value = pkl.loads(cache_value) df = cache_value["df"] query = cache_value["query"] status = utils.QueryStatus.SUCCESS is_loaded = True except Exception as e: logging.exception(e) logging.error( "Error reading cache: " + utils.error_msg_from_exception(e) ) logging.info("Serving from cache") if query_obj and not is_loaded: try: query_result = self.get_query_result(query_obj) status = query_result["status"] query = query_result["query"] error_message = query_result["error_message"] df = query_result["df"] if status != utils.QueryStatus.FAILED: stats_logger.incr("loaded_from_source") is_loaded = True except Exception as e: logging.exception(e) if not error_message: error_message = "{}".format(e) status = utils.QueryStatus.FAILED stacktrace = utils.get_stacktrace() if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED: try: cache_value = dict( dttm=cached_dttm, df=df if df is not None else None, query=query ) cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL) logging.info( "Caching {} chars at key {}".format( len(cache_binary), cache_key ) ) stats_logger.incr("set_cache_key") cache.set(cache_key, cache_binary, timeout=self.cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) return { "cache_key": cache_key, "cached_dttm": cache_value["dttm"] if cache_value is not None else None, "cache_timeout": self.cache_timeout, "df": df, "error": error_message, "is_cached": cache_key is not None, "query": query, "status": status, "stacktrace": stacktrace, "rowcount": len(df.index) if df is not None else 0, }
def get_payload(self, force=False): """Handles caching around the json payload retrieval""" cache_key = self.cache_key payload = None force = force if force else self.form_data.get('force') == 'true' if not force and cache: payload = cache.get(cache_key) if payload: is_cached = True try: cached_data = zlib.decompress(payload) if PY3: cached_data = cached_data.decode('utf-8') payload = json.loads(cached_data) except Exception as e: logging.error("Error reading cache: " + utils.error_msg_from_exception(e)) payload = None logging.info("Serving from cache") if not payload: data = None is_cached = False cache_timeout = self.cache_timeout stacktrace = None try: df = self.get_df() if not self.error_message: data = self.get_data(df) except Exception as e: logging.exception(e) if not self.error_message: self.error_message = str(e) self.status = utils.QueryStatus.FAILED data = None stacktrace = traceback.format_exc() payload = { 'cache_key': cache_key, 'cache_timeout': cache_timeout, 'data': data, 'error': self.error_message, 'form_data': self.form_data, 'query': self.query, 'status': self.status, 'stacktrace': stacktrace, } payload['cached_dttm'] = datetime.now().isoformat().split('.')[0] logging.info( "Caching for the next {} seconds".format(cache_timeout)) data = self.json_dumps(payload) if PY3: data = bytes(data, 'utf-8') if cache and self.status != utils.QueryStatus.FAILED: try: cache.set(cache_key, zlib.compress(data), timeout=cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) payload['is_cached'] = is_cached return payload
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: # check if the user can access the resource check_perms(*args, **kwargs) # for POST requests we can't set cache headers, use the response # cache nor use conditional requests; this will still use the # dataframe cache in `superset/viz.py`, though. if request.method == "POST" or (skip and skip(*args, **kwargs)): return f(*args, **kwargs) response = None last_modified = get_last_modified and get_last_modified(*args, **kwargs) if cache: try: # build the cache key from the function arguments and any # other additional GET arguments (like `form_data`, eg). key_args = list(args) key_kwargs = kwargs.copy() key_kwargs.update(request.args) cache_key = wrapper.make_cache_key( # type: ignore f, *key_args, **key_kwargs ) response = cache.get(cache_key) except Exception: # pylint: disable=broad-except if app.debug: raise logger.exception("Exception possibly due to cache backend.") # if cache is stale? if ( response and last_modified and response.last_modified and response.last_modified < last_modified ): response = None if response is None: # if no response was cached, compute it using the wrapped function response = f(*args, **kwargs) # set expiration headers: # Last-Modified, Expires, Cache-Control, ETag response.last_modified = last_modified or datetime.utcnow() expiration = max_age if max_age != 0 else FAR_FUTURE response.expires = response.last_modified + timedelta( seconds=expiration ) # when needed, instruct the browser to always revalidate cache if must_revalidate: # `Cache-Control: no-cache` asks the browser to always store # the cache, but also must validate it with the server. response.cache_control.no_cache = True else: # `Cache-Control: Public` asks the browser to always store # the cache. response.cache_control.public = True response.add_etag() # if we have a cache, store the response from the request if cache: try: cache.set(cache_key, response, timeout=max_age) except Exception: # pylint: disable=broad-except if app.debug: raise logger.exception("Exception possibly due to cache backend.") return response.make_conditional(request)
def load(cache_key: str) -> Dict[str, Any]: cache_value = cache.get(cache_key) if not cache_value: raise ChartDataCacheLoadError("Cached data not found") return cache_value["data"]
def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin: # check if the user can access the resource check_perms(*args, **kwargs) # for POST requests we can't set cache headers, use the response # cache nor use conditional requests; this will still use the # dataframe cache in `superset/viz.py`, though. if request.method == "POST" or (skip and skip(*args, **kwargs)): return f(*args, **kwargs) response = None if cache: try: # build the cache key from the function arguments and any # other additional GET arguments (like `form_data`, eg). key_args = list(args) key_kwargs = kwargs.copy() key_kwargs.update(request.args) cache_key = wrapper.make_cache_key( # type: ignore f, *key_args, **key_kwargs ) response = cache.get(cache_key) except Exception: # pylint: disable=broad-except if app.debug: raise logger.exception("Exception possibly due to cache backend.") # if cache is stale? if get_last_modified: content_changed_time = get_last_modified(*args, **kwargs) if ( response and response.last_modified and response.last_modified.timestamp() < content_changed_time.timestamp() ): response = None else: # if caller didn't provide content's last_modified time, assume # its cache won't be stale. content_changed_time = datetime.utcnow() # if no response was cached, compute it using the wrapped function if response is None: response = f(*args, **kwargs) # add headers for caching: Last Modified, Expires and ETag response.cache_control.public = True response.last_modified = content_changed_time expiration = max_age if max_age != 0 else FAR_FUTURE response.expires = response.last_modified + timedelta( seconds=expiration ) response.add_etag() # if we have a cache, store the response from the request if cache: try: cache.set(cache_key, response, timeout=max_age) except Exception: # pylint: disable=broad-except if app.debug: raise logger.exception("Exception possibly due to cache backend.") return response.make_conditional(request)
def download(self, path=''): logging.info('Start download from google drive') logging.info(path) report_filename = self.get_report_filename(self.report, self.from_date, self.to_date) time_range = {'since': self.from_date, 'until': self.to_date} if cache: cache_key = path cache_timeout = CONNECTOR_INFO.get('report_cache_timeout', 60 * 60) z_report = cache.get(cache_key) if z_report is not None: return petl.io.fromjson( petl.MemorySource(zlib.decompress(z_report))) ############################### logging.info('Download Report from {}'.format(path)) request = fbAdAccount.get_insights( # pending=True, async=True, fields=fb_ads_insight_fields, params={ 'time_increment': 1, # 'limit': 1, 'level': fbAdsInsights.Level.ad, # 'breakdowns': report., 'time_range': time_range }) # storage = self.storage[0] # storage.init() # # fname = '{}.json'.format(self.report) # # with storage.open(path) as archive_file: # with zipfile.ZipFile(archive_file) as zip_file: # # logging.info(fname) # report = zip_file.read(fname) # z_report = zlib.compress(report) # cache.set(cache_key, z_report, timeout=cache_timeout) # # return petl.io.fromjson(petl.MemorySource(report)) # else: # # move to init # if not os.path.exists(self.report_folder): # os.makedirs(self.report_folder) # # if not os.path.exists(report_filename): # logging.info('Download Report from {}'.format(path)) # storage = self.storage[0] # storage.init() # # fname = '{}.json'.format(self.report) # # with storage.open(path) as archive_file: # with zipfile.ZipFile(archive_file) as zip_file: # # logging.info(fname) # report = zip_file.read(fname) # # with open(report_filename, 'wb') as f: # f.write(report) # # logging.info( # 'Read from {}'.format(report_filename)) # report = petl.io.fromjson(report_filename) # return report return []
def download(self, urls=[]): # timeout setting for requests # timeout = urllib3.Timeout(connect=2.0, read=7.0) # http = urllib3.PoolManager(timeout=timeout) http = urllib3.PoolManager() report_data = [] for url in urls: # print(url) report_filename = self.get_report_filename( hashlib.md5(url).hexdigest()) if cache: # print('use cache') cache_key = url cache_timeout = CONNECTOR_INFO.get('report_cache_timeout', 60 * 60) z_report = cache.get(cache_key) if z_report is not None: new_report_data = petl.io.fromcsv( petl.MemorySource(zlib.decompress(z_report))) # print(len(new_report_data)) if not report_data: # print('NEw cat') report_data = new_report_data else: report_data = petl.cat(report_data, new_report_data) continue logging.info('Download Report from {}'.format(url)) r = http.request('GET', url, retries=urllib3.Retry( redirect=2, backoff_factor=2, )) if r.status == 200: report = r.data r.release_conn() z_report = zlib.compress(report) cache.set(cache_key, z_report, timeout=cache_timeout) # return petl.io.fromcsv(petl.MemorySource(report)) new_report_data = petl.io.fromcsv( petl.MemorySource(report)) # print(len(new_report_data)) if not report_data: report_data = new_report_data else: report_data = petl.cat(report_data, new_report_data) elif r.status == 403: raise Exception(r.data) else: logging.info(r.data) logging.info(r.status) logging.info(r.headers) else: # move to init # print('Not cache') if not os.path.exists(self.report_folder): os.makedirs(self.report_folder) if not os.path.exists(report_filename): logging.info('Download Report from {}'.format(url)) r = http.request('GET', url, retries=urllib3.Retry( redirect=2, backoff_factor=2, )) if r.status == 200: with open(report_filename, 'wb') as f: f.write(r.data) r.release_conn() logging.info('Read from {}'.format(report_filename)) new_report_data = petl.io.fromcsv(report_filename) if not report_data: report_data = new_report_data else: report_data = petl.cat(report_data, new_report_data) return report_data
def get_payload(self, force=False): """Handles caching around the json payload retrieval""" cache_key = self.cache_key payload = None force = force if force else self.form_data.get('force') == 'true' if not force and cache: payload = cache.get(cache_key) if payload: is_cached = True try: cached_data = zlib.decompress(payload) if PY3: cached_data = cached_data.decode('utf-8') payload = json.loads(cached_data) except Exception as e: logging.error("Error reading cache: " + utils.error_msg_from_exception(e)) payload = None logging.info("Serving from cache") if not payload: data = None is_cached = False cache_timeout = self.cache_timeout stacktrace = None try: df = self.get_df() if not self.error_message: data = self.get_data(df) except Exception as e: logging.exception(e) if not self.error_message: self.error_message = str(e) self.status = utils.QueryStatus.FAILED data = None stacktrace = traceback.format_exc() payload = { 'cache_key': cache_key, 'cache_timeout': cache_timeout, 'data': data, 'error': self.error_message, 'form_data': self.form_data, 'query': self.query, 'status': self.status, 'stacktrace': stacktrace, } payload['cached_dttm'] = datetime.now().isoformat().split('.')[0] logging.info("Caching for the next {} seconds".format( cache_timeout)) data = self.json_dumps(payload) if PY3: data = bytes(data, 'utf-8') if cache and self.status != utils.QueryStatus.FAILED: try: cache.set( cache_key, zlib.compress(data), timeout=cache_timeout) except Exception as e: # cache.set call can fail if the backend is down or if # the key is too large or whatever other reasons logging.warning("Could not cache key {}".format(cache_key)) logging.exception(e) cache.delete(cache_key) payload['is_cached'] = is_cached return payload