def import_tsa(self, datestring, tsa): """ store tsa given in parameter in global_cache to make the data available usually this could be modfied existing tsa extended by some keys, or filtered or ... the structure has to be predefined in meta data the tsa can afterwards be accessed via normal frontends (web, api) parameters: tsa <TimeseriesArrayLazy> object """ assert self.__index_keynames == tsa.index_keynames assert self.__value_keynames == tuple(tsa.value_keynames) cachedir = self.__get_cachedir(datestring) cachefilename = os.path.join(cachedir, TimeseriesArrayLazy.get_dumpfilename(tsa.index_keynames)) if not os.path.isfile(cachefilename): tsa.dump_split(cachedir) tsastats = TimeseriesArrayStats(tsa) tsastats.dump(cachedir) qantile = QuantileArray(tsa, tsastats) q_cachefilename = os.path.join(cachedir, "quantile.json") qantile.dump(open(q_cachefilename, "wb")) else: raise StandardError("TSA Archive %s exists already in cache" % cachefilename)
def fallback(): """ fallback method to use, if reading from cache data is not possible """ tsa = self.load_tsa(datestring=datestring, filterkeys=None, timedelta=timedelta) # load full tsa, and generate statistics tsastats = TimeseriesArrayStats(tsa) # generate full Stats tsastats.dump(cachedir) # save tsastats = TimeseriesArrayStats.load(cachedir, self.__index_keynames, filterkeys=filterkeys) # read specific return tsastats
def load_tsastats(self, datestring, filterkeys=None, timedelta=0, cleancache=False): """ caching version to load_tsa_raw if never called, get ts from load_tsa_raw, and afterwards dump_tsa on every consecutive call read from cached version use cleancache to remove caches parameters: datestring <str> timedelta <int> cleancache <bool> returns <TimeseriesArrayLazy> object read from cachefile or from raw data """ try: assert not_today(datestring) except AssertionError: raise DataLoggerLiveDataError("Reading from live data is not allowed") cachedir = self.__get_cachedir(datestring) cachefilename = os.path.join(cachedir, TimeseriesArrayStats.get_dumpfilename(self.__index_keynames)) def fallback(): """ fallback method to use, if reading from cache data is not possible """ tsa = self.load_tsa(datestring=datestring, filterkeys=None, timedelta=timedelta) # load full tsa, and generate statistics tsastats = TimeseriesArrayStats(tsa) # generate full Stats tsastats.dump(cachedir) # save tsastats = TimeseriesArrayStats.load(cachedir, self.__index_keynames, filterkeys=filterkeys) # read specific return tsastats if not os.path.isfile(cachefilename): logging.info("cachefile %s does not exist, fallback read from tsa archive", cachefilename) return fallback() if (os.path.isfile(cachefilename)) and (cleancache == True): logging.info("deleting cachefile %s and read from raw", cachefilename) os.unlink(cachefilename) return fallback() logging.debug("loading stored TimeseriesArrayLazy object file %s", cachefilename) try: tsastats = TimeseriesArrayStats.load(cachedir, self.__index_keynames, filterkeys=filterkeys) return tsastats except IOError: logging.error("IOError while reading from %s, using fallback", cachefilename) os.unlink(cachefilename) return fallback() except EOFError: logging.error("EOFError while reading from %s, using fallback", cachefilename) os.unlink(cachefilename) return fallback()
def fallback(): """ fallback method to use, if reading from cache data is not possible """ tsa = self.load_tsa_raw(datestring, timedelta) tsa.dump_split(cachedir) # save full data # read the data afterwards to make sure there is no problem, if validate is True: tsa = TimeseriesArrayLazy.load_split(cachedir, self.__index_keynames, filterkeys=filterkeys, index_pattern=index_pattern, datatypes=self.__datatypes) # also generate TSASTATS and dump to cache directory tsastats = TimeseriesArrayStats(tsa) # generate full Stats tsastats.dump(cachedir) # save # and at last but not least quantile qantile = QuantileArray(tsa, tsastats) cachefilename = os.path.join(cachedir, "quantile.json") qantile.dump(open(cachefilename, "wb")) # finally return tsa return tsa
def tsastat_group_by(tsastat, subkey): """ group given tsastat array by some subkey parameters: tsastat <TimeseriesArrayStats> subkey <tuple> subkey to group by returns: <dict> """ # how to aggregate statistical values group_funcs = { u'count' : lambda a, b: a + b, u'std' : lambda a, b: (a + b)/2, u'avg': lambda a, b: (a + b)/2, u'last' : lambda a, b: -1.0, # theres no meaning u'min' : min, u'max' : max, u'sum' : lambda a, b: (a + b) / 2, u'median' : lambda a, b: (a + b)/2, u'mean' : lambda a, b: (a + b)/2, u'diff' : lambda a, b: (a + b)/2, u'dec' : lambda a, b: (a + b)/2, u'inc' : lambda a, b: (a + b)/2, u'first' : lambda a, b: -1.0, # theres no meaning } # create new empty TimeseriesArrayStats Object tsastats_new = TimeseriesArrayStats.__new__(TimeseriesArrayStats) tsastats_new.index_keys = subkey # only subkey tsastats_new.value_keys = tsastat.value_keys # same oas original newdata = {} for index_key, tsstat in tsastat.items(): key_dict = dict(zip(tsastat.index_keynames, index_key)) newkey = None if len(subkey) == 0: # no subkey means total aggregation newkey = ("__total__", ) else: newkey = tuple([key_dict[key] for key in subkey]) if newkey not in newdata: newdata[newkey] = {} for value_key in tsastat.value_keynames: if value_key not in newdata[newkey]: newdata[newkey][value_key] = dict(tsstat[value_key]) else: for stat_funcname in tsstat[value_key].keys(): existing = float(newdata[newkey][value_key][stat_funcname]) to_group = float(tsstat[value_key][stat_funcname]) newdata[newkey][value_key][stat_funcname] = group_funcs[stat_funcname](existing, to_group) tsastats_new.stats = newdata return tsastats_new