コード例 #1
0
    def load_query_context_from_cache(  # pylint: disable=no-self-use
            self, cache_key: str) -> Dict[str, Any]:
        cache_value = cache.get(cache_key)
        if not cache_value:
            raise ChartDataCacheLoadError("Cached data not found")

        return cache_value["data"]
コード例 #2
0
ファイル: connector.py プロジェクト: woakes070048/bit
    def download(self, path=''):

        logging.info('Start download from google drive')
        logging.info(path)

        report_filename = self.get_report_filename(self.report, self.from_date,
                                                   self.to_date)
        if cache:
            cache_key = path
            cache_timeout = CONNECTOR_INFO.get('report_cache_timeout', 60 * 60)

            z_report = cache.get(cache_key)
            if z_report is not None:
                return petl.io.fromjson(
                    petl.MemorySource(zlib.decompress(z_report)))

            logging.info('Download Report from {}'.format(path))

            storage = self.storage[0]
            storage.init()

            fname = '{}.json'.format(self.report)

            with storage.open(path) as archive_file:
                with zipfile.ZipFile(archive_file) as zip_file:
                    # logging.info(fname)
                    report = zip_file.read(fname)
                    z_report = zlib.compress(report)
                    cache.set(cache_key, z_report, timeout=cache_timeout)
                    return petl.io.fromjson(petl.MemorySource(report))
        else:
            # move to init
            if not os.path.exists(self.report_folder):
                os.makedirs(self.report_folder)

            if not os.path.exists(report_filename):
                logging.info('Download Report from {}'.format(path))
                storage = self.storage[0]
                storage.init()

                fname = '{}.json'.format(self.report)

                with storage.open(path) as archive_file:
                    with zipfile.ZipFile(archive_file) as zip_file:
                        # logging.info(fname)
                        report = zip_file.read(fname)

                        with open(report_filename, 'wb') as f:
                            f.write(report)

                        logging.info('Read from {}'.format(report_filename))
                        report = petl.io.fromjson(report_filename)
                        return report
        return []
コード例 #3
0
        def wrapper(*args, **kwargs):
            # check if the user can access the resource
            check_perms(*args, **kwargs)

            # for POST requests we can't set cache headers, use the response
            # cache nor use conditional requests; this will still use the
            # dataframe cache in `superset/viz.py`, though.
            if request.method == "POST":
                return f(*args, **kwargs)

            response = None
            if cache:
                try:
                    # build the cache key from the function arguments and any
                    # other additional GET arguments (like `form_data`, eg).
                    key_args = list(args)
                    key_kwargs = kwargs.copy()
                    key_kwargs.update(request.args)
                    cache_key = wrapper.make_cache_key(f, *key_args,
                                                       **key_kwargs)
                    response = cache.get(cache_key)
                except Exception:  # pylint: disable=broad-except
                    if app.debug:
                        raise
                    logging.exception(
                        "Exception possibly due to cache backend.")

            # if no response was cached, compute it using the wrapped function
            if response is None:
                response = f(*args, **kwargs)

                # add headers for caching: Last Modified, Expires and ETag
                response.cache_control.public = True
                response.last_modified = datetime.utcnow()
                expiration = max_age if max_age != 0 else FAR_FUTURE
                response.expires = response.last_modified + timedelta(
                    seconds=expiration)
                response.add_etag()

                # if we have a cache, store the response from the request
                if cache:
                    try:
                        cache.set(cache_key, response, timeout=max_age)
                    except Exception:  # pylint: disable=broad-except
                        if app.debug:
                            raise
                    logging.exception(
                        "Exception possibly due to cache backend.")

            return response.make_conditional(request)
コード例 #4
0
ファイル: decorators.py プロジェクト: tan31989/caravel
        def wrapper(*args, **kwargs):
            # check if the user can access the resource
            check_perms(*args, **kwargs)

            # for POST requests we can't set cache headers, use the response
            # cache nor use conditional requests; this will still use the
            # dataframe cache in `superset/viz.py`, though.
            if request.method == 'POST':
                return f(*args, **kwargs)

            response = None
            if cache:
                try:
                    # build the cache key from the function arguments and any
                    # other additional GET arguments (like `form_data`, eg).
                    key_args = list(args)
                    key_kwargs = kwargs.copy()
                    key_kwargs.update(request.args)
                    cache_key = wrapper.make_cache_key(f, *key_args, **key_kwargs)
                    response = cache.get(cache_key)
                except Exception:  # pylint: disable=broad-except
                    if app.debug:
                        raise
                    logging.exception('Exception possibly due to cache backend.')

            # if no response was cached, compute it using the wrapped function
            if response is None:
                response = f(*args, **kwargs)

                # add headers for caching: Last Modified, Expires and ETag
                response.cache_control.public = True
                response.last_modified = datetime.utcnow()
                expiration = max_age if max_age != 0 else FAR_FUTURE
                response.expires = \
                    response.last_modified + timedelta(seconds=expiration)
                response.add_etag()

                # if we have a cache, store the response from the request
                if cache:
                    try:
                        cache.set(cache_key, response, timeout=max_age)
                    except Exception:  # pylint: disable=broad-except
                        if app.debug:
                            raise
                    logging.exception('Exception possibly due to cache backend.')

            return response.make_conditional(request)
コード例 #5
0
    def get_df_payload(  # pylint: disable=too-many-statements
            self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]:
        """Handles caching around the df payload retrieval"""
        cache_key = self.cache_key(query_obj, **kwargs)
        logger.info("Cache key: %s", cache_key)
        is_loaded = False
        stacktrace = None
        df = pd.DataFrame()
        cached_dttm = datetime.utcnow().isoformat().split(".")[0]
        cache_value = None
        status = None
        query = ""
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loading_from_cache")
                try:
                    df = cache_value["df"]
                    query = cache_value["query"]
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                    stats_logger.incr("loaded_from_cache")
                except KeyError as ex:
                    logger.exception(ex)
                    logger.error("Error reading cache: %s",
                                 utils.error_msg_from_exception(ex))
                logger.info("Serving from cache")

        if query_obj and not is_loaded:
            try:
                invalid_columns = [
                    col for col in query_obj.columns + query_obj.groupby +
                    utils.get_column_names_from_metrics(query_obj.metrics)
                    if col not in self.datasource.column_names
                ]
                if invalid_columns:
                    raise QueryObjectValidationError(
                        _(
                            "Columns missing in datasource: %(invalid_columns)s",
                            invalid_columns=invalid_columns,
                        ))
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    if not self.force:
                        stats_logger.incr("loaded_from_source_without_force")
                    is_loaded = True
            except QueryObjectValidationError as ex:
                error_message = str(ex)
                status = utils.QueryStatus.FAILED
            except Exception as ex:  # pylint: disable=broad-except
                logger.exception(ex)
                if not error_message:
                    error_message = str(ex)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
                set_and_log_cache(
                    cache_key,
                    df,
                    query,
                    cached_dttm,
                    self.cache_timeout,
                    self.datasource.uid,
                )
        return {
            "cache_key": cache_key,
            "cached_dttm":
            cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "error": error_message,
            "is_cached": cache_value is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index),
        }
コード例 #6
0
ファイル: query_context.py プロジェクト: estevaocs/acttivebi
    def get_df_payload(  # pylint: disable=too-many-locals,too-many-statements
            self, query_obj: QueryObject, **kwargs: Any) -> Dict[str, Any]:
        """Handles caching around the df payload retrieval"""
        cache_key = self.cache_key(query_obj, **kwargs)
        logger.info("Cache key: %s", cache_key)
        is_loaded = False
        stacktrace = None
        df = pd.DataFrame()
        cached_dttm = datetime.utcnow().isoformat().split(".")[0]
        cache_value = None
        status = None
        query = ""
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loading_from_cache")
                try:
                    cache_value = pkl.loads(cache_value)
                    df = cache_value["df"]
                    query = cache_value["query"]
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                    stats_logger.incr("loaded_from_cache")
                except Exception as ex:  # pylint: disable=broad-except
                    logger.exception(ex)
                    logger.error("Error reading cache: %s",
                                 utils.error_msg_from_exception(ex))
                logger.info("Serving from cache")

        if query_obj and not is_loaded:
            try:
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    if not self.force:
                        stats_logger.incr("loaded_from_source_without_force")
                    is_loaded = True
            except Exception as ex:  # pylint: disable=broad-except
                logger.exception(ex)
                if not error_message:
                    error_message = "{}".format(ex)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
                try:
                    cache_value = dict(dttm=cached_dttm, df=df, query=query)
                    cache_binary = pkl.dumps(cache_value,
                                             protocol=pkl.HIGHEST_PROTOCOL)

                    logger.info("Caching %d chars at key %s",
                                len(cache_binary), cache_key)

                    stats_logger.incr("set_cache_key")
                    cache.set(cache_key,
                              cache_binary,
                              timeout=self.cache_timeout)
                except Exception as ex:  # pylint: disable=broad-except
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logger.warning("Could not cache key %s", cache_key)
                    logger.exception(ex)
                    cache.delete(cache_key)
        return {
            "cache_key": cache_key,
            "cached_dttm":
            cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "error": error_message,
            "is_cached": cache_key is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index),
        }
コード例 #7
0
    def get_df_payload(self, query_obj, **kwargs):
        """Handles caching around the df paylod retrieval"""
        cache_key = query_obj.cache_key(
            datasource=self.datasource.uid, **kwargs) if query_obj else None
        logging.info('Cache key: {}'.format(cache_key))
        is_loaded = False
        stacktrace = None
        df = None
        cached_dttm = datetime.utcnow().isoformat().split('.')[0]
        cache_value = None
        status = None
        query = ''
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr('loaded_from_cache')
                try:
                    cache_value = pkl.loads(cache_value)
                    df = cache_value['df']
                    query = cache_value['query']
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                except Exception as e:
                    logging.exception(e)
                    logging.error('Error reading cache: ' +
                                  utils.error_msg_from_exception(e))
                logging.info('Serving from cache')

        if query_obj and not is_loaded:
            try:
                query_result = self.get_query_result(query_obj)
                status = query_result['status']
                query = query_result['query']
                error_message = query_result['error_message']
                df = query_result['df']
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr('loaded_from_source')
                    is_loaded = True
            except Exception as e:
                logging.exception(e)
                if not error_message:
                    error_message = '{}'.format(e)
                status = utils.QueryStatus.FAILED
                stacktrace = traceback.format_exc()

            if (
                    is_loaded and
                    cache_key and
                    cache and
                    status != utils.QueryStatus.FAILED):
                try:
                    cache_value = dict(
                        dttm=cached_dttm,
                        df=df if df is not None else None,
                        query=query,
                    )
                    cache_value = pkl.dumps(
                        cache_value, protocol=pkl.HIGHEST_PROTOCOL)

                    logging.info('Caching {} chars at key {}'.format(
                        len(cache_value), cache_key))

                    stats_logger.incr('set_cache_key')
                    cache.set(
                        cache_key,
                        cache_value,
                        timeout=self.cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning('Could not cache key {}'.format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        return {
            'cache_key': cache_key,
            'cached_dttm': cache_value['dttm'] if cache_value is not None else None,
            'cache_timeout': self.cache_timeout,
            'df': df,
            'error': error_message,
            'is_cached': cache_key is not None,
            'query': query,
            'status': status,
            'stacktrace': stacktrace,
            'rowcount': len(df.index) if df is not None else 0,
        }
コード例 #8
0
    def get_df_payload(self, query_obj: QueryObject, **kwargs):
        """Handles caching around the df paylod retrieval"""
        extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict())
        cache_key = (
            query_obj.cache_key(
                datasource=self.datasource.uid,
                extra_cache_keys=extra_cache_keys,
                **kwargs
            )
            if query_obj
            else None
        )
        logging.info("Cache key: {}".format(cache_key))
        is_loaded = False
        stacktrace = None
        df = None
        cached_dttm = datetime.utcnow().isoformat().split(".")[0]
        cache_value = None
        status = None
        query = ""
        error_message = None
        if cache_key and cache and not self.force:
            cache_value = cache.get(cache_key)
            if cache_value:
                stats_logger.incr("loaded_from_cache")
                try:
                    cache_value = pkl.loads(cache_value)
                    df = cache_value["df"]
                    query = cache_value["query"]
                    status = utils.QueryStatus.SUCCESS
                    is_loaded = True
                except Exception as e:
                    logging.exception(e)
                    logging.error(
                        "Error reading cache: " + utils.error_msg_from_exception(e)
                    )
                logging.info("Serving from cache")

        if query_obj and not is_loaded:
            try:
                query_result = self.get_query_result(query_obj)
                status = query_result["status"]
                query = query_result["query"]
                error_message = query_result["error_message"]
                df = query_result["df"]
                if status != utils.QueryStatus.FAILED:
                    stats_logger.incr("loaded_from_source")
                    is_loaded = True
            except Exception as e:
                logging.exception(e)
                if not error_message:
                    error_message = "{}".format(e)
                status = utils.QueryStatus.FAILED
                stacktrace = utils.get_stacktrace()

            if is_loaded and cache_key and cache and status != utils.QueryStatus.FAILED:
                try:
                    cache_value = dict(
                        dttm=cached_dttm, df=df if df is not None else None, query=query
                    )
                    cache_binary = pkl.dumps(cache_value, protocol=pkl.HIGHEST_PROTOCOL)

                    logging.info(
                        "Caching {} chars at key {}".format(
                            len(cache_binary), cache_key
                        )
                    )

                    stats_logger.incr("set_cache_key")
                    cache.set(cache_key, cache_binary, timeout=self.cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning("Could not cache key {}".format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        return {
            "cache_key": cache_key,
            "cached_dttm": cache_value["dttm"] if cache_value is not None else None,
            "cache_timeout": self.cache_timeout,
            "df": df,
            "error": error_message,
            "is_cached": cache_key is not None,
            "query": query,
            "status": status,
            "stacktrace": stacktrace,
            "rowcount": len(df.index) if df is not None else 0,
        }
コード例 #9
0
    def get_payload(self, force=False):
        """Handles caching around the json payload retrieval"""
        cache_key = self.cache_key
        payload = None
        force = force if force else self.form_data.get('force') == 'true'
        if not force and cache:
            payload = cache.get(cache_key)

        if payload:
            is_cached = True
            try:
                cached_data = zlib.decompress(payload)
                if PY3:
                    cached_data = cached_data.decode('utf-8')
                payload = json.loads(cached_data)
            except Exception as e:
                logging.error("Error reading cache: " +
                              utils.error_msg_from_exception(e))
                payload = None
            logging.info("Serving from cache")

        if not payload:
            data = None
            is_cached = False
            cache_timeout = self.cache_timeout
            stacktrace = None
            try:
                df = self.get_df()
                if not self.error_message:
                    data = self.get_data(df)
            except Exception as e:
                logging.exception(e)
                if not self.error_message:
                    self.error_message = str(e)
                self.status = utils.QueryStatus.FAILED
                data = None
                stacktrace = traceback.format_exc()
            payload = {
                'cache_key': cache_key,
                'cache_timeout': cache_timeout,
                'data': data,
                'error': self.error_message,
                'form_data': self.form_data,
                'query': self.query,
                'status': self.status,
                'stacktrace': stacktrace,
            }
            payload['cached_dttm'] = datetime.now().isoformat().split('.')[0]
            logging.info(
                "Caching for the next {} seconds".format(cache_timeout))
            data = self.json_dumps(payload)
            if PY3:
                data = bytes(data, 'utf-8')
            if cache and self.status != utils.QueryStatus.FAILED:
                try:
                    cache.set(cache_key,
                              zlib.compress(data),
                              timeout=cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning("Could not cache key {}".format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        payload['is_cached'] = is_cached
        return payload
コード例 #10
0
        def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
            # check if the user can access the resource
            check_perms(*args, **kwargs)

            # for POST requests we can't set cache headers, use the response
            # cache nor use conditional requests; this will still use the
            # dataframe cache in `superset/viz.py`, though.
            if request.method == "POST" or (skip and skip(*args, **kwargs)):
                return f(*args, **kwargs)

            response = None
            last_modified = get_last_modified and get_last_modified(*args, **kwargs)

            if cache:
                try:
                    # build the cache key from the function arguments and any
                    # other additional GET arguments (like `form_data`, eg).
                    key_args = list(args)
                    key_kwargs = kwargs.copy()
                    key_kwargs.update(request.args)
                    cache_key = wrapper.make_cache_key(  # type: ignore
                        f, *key_args, **key_kwargs
                    )
                    response = cache.get(cache_key)
                except Exception:  # pylint: disable=broad-except
                    if app.debug:
                        raise
                    logger.exception("Exception possibly due to cache backend.")

                # if cache is stale?
                if (
                    response
                    and last_modified
                    and response.last_modified
                    and response.last_modified < last_modified
                ):
                    response = None

            if response is None:
                # if no response was cached, compute it using the wrapped function
                response = f(*args, **kwargs)

                # set expiration headers:
                #   Last-Modified, Expires, Cache-Control, ETag
                response.last_modified = last_modified or datetime.utcnow()
                expiration = max_age if max_age != 0 else FAR_FUTURE
                response.expires = response.last_modified + timedelta(
                    seconds=expiration
                )

                # when needed, instruct the browser to always revalidate cache
                if must_revalidate:
                    # `Cache-Control: no-cache` asks the browser to always store
                    # the cache, but also must validate it with the server.
                    response.cache_control.no_cache = True
                else:
                    # `Cache-Control: Public` asks the browser to always store
                    # the cache.
                    response.cache_control.public = True

                response.add_etag()

                # if we have a cache, store the response from the request
                if cache:
                    try:
                        cache.set(cache_key, response, timeout=max_age)
                    except Exception:  # pylint: disable=broad-except
                        if app.debug:
                            raise
                        logger.exception("Exception possibly due to cache backend.")

            return response.make_conditional(request)
コード例 #11
0
    def load(cache_key: str) -> Dict[str, Any]:
        cache_value = cache.get(cache_key)
        if not cache_value:
            raise ChartDataCacheLoadError("Cached data not found")

        return cache_value["data"]
コード例 #12
0
        def wrapper(*args: Any, **kwargs: Any) -> ETagResponseMixin:
            # check if the user can access the resource
            check_perms(*args, **kwargs)

            # for POST requests we can't set cache headers, use the response
            # cache nor use conditional requests; this will still use the
            # dataframe cache in `superset/viz.py`, though.
            if request.method == "POST" or (skip and skip(*args, **kwargs)):
                return f(*args, **kwargs)

            response = None
            if cache:
                try:
                    # build the cache key from the function arguments and any
                    # other additional GET arguments (like `form_data`, eg).
                    key_args = list(args)
                    key_kwargs = kwargs.copy()
                    key_kwargs.update(request.args)
                    cache_key = wrapper.make_cache_key(  # type: ignore
                        f, *key_args, **key_kwargs
                    )
                    response = cache.get(cache_key)
                except Exception:  # pylint: disable=broad-except
                    if app.debug:
                        raise
                    logger.exception("Exception possibly due to cache backend.")

            # if cache is stale?
            if get_last_modified:
                content_changed_time = get_last_modified(*args, **kwargs)
                if (
                    response
                    and response.last_modified
                    and response.last_modified.timestamp()
                    < content_changed_time.timestamp()
                ):
                    response = None
            else:
                # if caller didn't provide content's last_modified time, assume
                # its cache won't be stale.
                content_changed_time = datetime.utcnow()

            # if no response was cached, compute it using the wrapped function
            if response is None:
                response = f(*args, **kwargs)

                # add headers for caching: Last Modified, Expires and ETag
                response.cache_control.public = True
                response.last_modified = content_changed_time
                expiration = max_age if max_age != 0 else FAR_FUTURE
                response.expires = response.last_modified + timedelta(
                    seconds=expiration
                )
                response.add_etag()

                # if we have a cache, store the response from the request
                if cache:
                    try:
                        cache.set(cache_key, response, timeout=max_age)
                    except Exception:  # pylint: disable=broad-except
                        if app.debug:
                            raise
                        logger.exception("Exception possibly due to cache backend.")

            return response.make_conditional(request)
コード例 #13
0
ファイル: connector.py プロジェクト: woakes070048/bit
    def download(self, path=''):

        logging.info('Start download from google drive')
        logging.info(path)

        report_filename = self.get_report_filename(self.report, self.from_date,
                                                   self.to_date)

        time_range = {'since': self.from_date, 'until': self.to_date}

        if cache:
            cache_key = path
            cache_timeout = CONNECTOR_INFO.get('report_cache_timeout', 60 * 60)

            z_report = cache.get(cache_key)
            if z_report is not None:
                return petl.io.fromjson(
                    petl.MemorySource(zlib.decompress(z_report)))

            ###############################

            logging.info('Download Report from {}'.format(path))

            request = fbAdAccount.get_insights(
                # pending=True,
                async=True,
                fields=fb_ads_insight_fields,
                params={
                    'time_increment': 1,
                    # 'limit': 1,
                    'level': fbAdsInsights.Level.ad,
                    # 'breakdowns': report.,
                    'time_range': time_range
                })

            # storage = self.storage[0]
            # storage.init()
            #
            # fname = '{}.json'.format(self.report)
            #
            # with storage.open(path) as archive_file:
            #     with zipfile.ZipFile(archive_file) as zip_file:
            #         # logging.info(fname)
            #         report = zip_file.read(fname)
            #         z_report = zlib.compress(report)
            #         cache.set(cache_key, z_report, timeout=cache_timeout)
            #
            #         return petl.io.fromjson(petl.MemorySource(report))
        # else:
        #     # move to init
        #     if not os.path.exists(self.report_folder):
        #         os.makedirs(self.report_folder)
        #
        #     if not os.path.exists(report_filename):
        #         logging.info('Download Report from {}'.format(path))
        #         storage = self.storage[0]
        #         storage.init()
        #
        #         fname = '{}.json'.format(self.report)
        #
        #         with storage.open(path) as archive_file:
        #             with zipfile.ZipFile(archive_file) as zip_file:
        #                 # logging.info(fname)
        #                 report = zip_file.read(fname)
        #
        #                 with open(report_filename, 'wb') as f:
        #                     f.write(report)
        #
        #                 logging.info(
        #                     'Read from {}'.format(report_filename))
        #                 report = petl.io.fromjson(report_filename)
        #                 return report
        return []
コード例 #14
0
ファイル: connector.py プロジェクト: woakes070048/bit
    def download(self, urls=[]):

        # timeout setting for requests
        # timeout = urllib3.Timeout(connect=2.0, read=7.0)
        # http = urllib3.PoolManager(timeout=timeout)
        http = urllib3.PoolManager()

        report_data = []

        for url in urls:

            # print(url)

            report_filename = self.get_report_filename(
                hashlib.md5(url).hexdigest())

            if cache:
                # print('use cache')
                cache_key = url
                cache_timeout = CONNECTOR_INFO.get('report_cache_timeout',
                                                   60 * 60)

                z_report = cache.get(cache_key)
                if z_report is not None:

                    new_report_data = petl.io.fromcsv(
                        petl.MemorySource(zlib.decompress(z_report)))

                    # print(len(new_report_data))

                    if not report_data:
                        # print('NEw cat')
                        report_data = new_report_data
                    else:
                        report_data = petl.cat(report_data, new_report_data)

                    continue

                logging.info('Download Report from {}'.format(url))

                r = http.request('GET',
                                 url,
                                 retries=urllib3.Retry(
                                     redirect=2,
                                     backoff_factor=2,
                                 ))
                if r.status == 200:
                    report = r.data
                    r.release_conn()

                    z_report = zlib.compress(report)
                    cache.set(cache_key, z_report, timeout=cache_timeout)

                    # return petl.io.fromcsv(petl.MemorySource(report))

                    new_report_data = petl.io.fromcsv(
                        petl.MemorySource(report))
                    # print(len(new_report_data))
                    if not report_data:
                        report_data = new_report_data
                    else:
                        report_data = petl.cat(report_data, new_report_data)
                elif r.status == 403:
                    raise Exception(r.data)
                else:
                    logging.info(r.data)
                    logging.info(r.status)
                    logging.info(r.headers)

            else:
                # move to init

                # print('Not cache')
                if not os.path.exists(self.report_folder):
                    os.makedirs(self.report_folder)

                if not os.path.exists(report_filename):
                    logging.info('Download Report from {}'.format(url))

                    r = http.request('GET',
                                     url,
                                     retries=urllib3.Retry(
                                         redirect=2,
                                         backoff_factor=2,
                                     ))
                    if r.status == 200:
                        with open(report_filename, 'wb') as f:
                            f.write(r.data)
                        r.release_conn()

                        logging.info('Read from {}'.format(report_filename))

                        new_report_data = petl.io.fromcsv(report_filename)

                        if not report_data:
                            report_data = new_report_data
                        else:
                            report_data = petl.cat(report_data,
                                                   new_report_data)
        return report_data
コード例 #15
0
ファイル: viz.py プロジェクト: dawsongzhao/superset
    def get_payload(self, force=False):
        """Handles caching around the json payload retrieval"""
        cache_key = self.cache_key
        payload = None
        force = force if force else self.form_data.get('force') == 'true'
        if not force and cache:
            payload = cache.get(cache_key)

        if payload:
            is_cached = True
            try:
                cached_data = zlib.decompress(payload)
                if PY3:
                    cached_data = cached_data.decode('utf-8')
                payload = json.loads(cached_data)
            except Exception as e:
                logging.error("Error reading cache: " +
                              utils.error_msg_from_exception(e))
                payload = None
            logging.info("Serving from cache")

        if not payload:
            data = None
            is_cached = False
            cache_timeout = self.cache_timeout
            stacktrace = None
            try:
                df = self.get_df()
                if not self.error_message:
                    data = self.get_data(df)
            except Exception as e:
                logging.exception(e)
                if not self.error_message:
                    self.error_message = str(e)
                self.status = utils.QueryStatus.FAILED
                data = None
                stacktrace = traceback.format_exc()
            payload = {
                'cache_key': cache_key,
                'cache_timeout': cache_timeout,
                'data': data,
                'error': self.error_message,
                'form_data': self.form_data,
                'query': self.query,
                'status': self.status,
                'stacktrace': stacktrace,
            }
            payload['cached_dttm'] = datetime.now().isoformat().split('.')[0]
            logging.info("Caching for the next {} seconds".format(
                cache_timeout))
            data = self.json_dumps(payload)
            if PY3:
                data = bytes(data, 'utf-8')
            if cache and self.status != utils.QueryStatus.FAILED:
                try:
                    cache.set(
                        cache_key,
                        zlib.compress(data),
                        timeout=cache_timeout)
                except Exception as e:
                    # cache.set call can fail if the backend is down or if
                    # the key is too large or whatever other reasons
                    logging.warning("Could not cache key {}".format(cache_key))
                    logging.exception(e)
                    cache.delete(cache_key)
        payload['is_cached'] = is_cached
        return payload