def gen_caches(project, tablename, datestring): datalogger = DataLogger(basedir, project, tablename) caches = datalogger.get_caches(datestring) suffix = "%s/%s/%s\t" % (datestring, project, tablename) data = None if caches["tsa"]["raw"] is None: if len(caches["tsa"]["keys"]) == 0: logging.info("%s RAW Data not availabale maybe archived, tsa exists already", suffix) else: logging.debug("%s RAW Data is missing, no tsa archive exists", suffix) else: if len(caches["tsa"]["keys"]) == 0: logging.info("%s TSA Archive missing, calling get_tsa and load_tsastats", suffix) data = datalogger.load_tsa(datestring) else: if len(caches["tsastat"]["keys"]) == 0: logging.info("%s TSASTAT Archive missing, calling load_tsastats", suffix) data = datalogger.load_tsastats(datestring) else: if len(caches["ts"]["keys"]) == 0: logging.info("%s there are no ts archives, something went wrong, or tsa is completely empty, calling load_tsastats", suffix) data = datalogger.load_tsastats(datestring) else: logging.debug("%s All fine", suffix) if caches["quantile"]["exists"] is not True: logging.info("%s Quantile archive is missing, calling load_quantile", suffix) data = datalogger.load_quantile(datestring) del data del caches del datalogger
def get_tsa_adv(self, args): """ return exported TimeseriesArray json formatted """ group_funcs = { "avg" : lambda a, b: (a+b)/2, "min" : min, "max" : max, "sum" : lambda a, b: a+b, } logging.info(args) project, tablename, datestring, groupkeys_enc, group_func_name, index_pattern_enc = args groupkeys_dec = eval(base64.b64decode(groupkeys_enc)) # should be tuple logging.info("groupkeys_dec: %s", groupkeys_dec) index_pattern = base64.b64decode(index_pattern_enc) if index_pattern == "None": index_pattern = None logging.info("index_pattern: %s", index_pattern) assert group_func_name in group_funcs.keys() datalogger = DataLogger(basedir, project, tablename) tsa = None # gete data if groupkeys_dec is not None: logging.info("groupkeys is %s", groupkeys_dec) groupkeys = tuple([unicode(key_value) for key_value in groupkeys_dec]) tsa1 = datalogger.load_tsa(datestring, index_pattern=index_pattern) tsa = datalogger.group_by(datestring, tsa1, groupkeys, group_funcs[group_func_name]) else: logging.info("groupkeys is None, fallback to get ungrouped tsa") tsa = datalogger.load_tsa(datestring, index_pattern=index_pattern) logging.info(tsa.keys()[0]) web.header('Content-type', 'text/html') # you must not set this option, according to # http://stackoverflow.com/questions/11866333/ioerror-when-trying-to-serve-file # web.header('Transfer-Encoding','chunked') yield "[" + json.dumps(tsa.export().next()) for chunk in tsa.export(): #logging.info("yielding %s", chunk) yield "," + json.dumps(chunk) yield "]"
def get_ts(self, args): """ get TimeseriesArray object with one particular Timeseries selected by key parameters: /<str>project/<str>tablename/<str>datestring/base64endoded(tuple(key)) returns: tsa exported in JSON format """ assert len(args) == 4 project, tablename, datestring, key_str = args key = tuple([unicode(key_value) for key_value in eval(base64.b64decode(key_str))]) logging.info("project : %s", project) logging.info("tablename : %s", tablename) logging.info("datestring : %s", datestring) logging.info("key : %s", key) datalogger = DataLogger(basedir, project, tablename) key_dict = dict(zip(datalogger.index_keynames, key)) tsa = datalogger.load_tsa(datestring, filterkeys=key_dict) yield "[" + json.dumps(tsa.export().next()) for chunk in tsa.export(): yield "," + json.dumps(chunk) yield "]"
def get_hc_daily_data(self, args): """ get values(min 1) from TimeseriesArray to use for highcharts graphing parameters: /project/tablename/datestring/index_key/value_keynames/index_keyname <b>poject</b> <str> defines which project to use <b>tablename</b> <str> defines which tablename to use <b>datestring</b> <str> in form of YYYY-MM-DD to define whih day to use <b>index_key</b> base64 encoded tuple, defines which Timeseries to use, ex. (u'srvcl14db2.tilak.cc', u'DB2', u'ablagsys', u'data only') <b>value_keynames</b> json encoded list of value_keynames to show in graph each value_keyname will be a separate highchart line <b>index_keynam</b> json encoded <str> or null if given, the data will be grouped on this given index_keyname if hostname is given the above example will be gruped by hostname=u'srvcl14db2.tilak.cc' and all possible Timeseries will be summed up return data json encoded like this [ { name : "timeseries value_name 1", data : [[ts, value], ...] }, { name : "timeseries value name 2", data : [[ts, value], ...] } ... ] this structure could already be used in highcharts.data """ assert len(args) == 6 project, tablename, datestring, index_key_b64, value_keynames_str, index_keyname_str = args # key_str should be a tuple string, convert to unicode tuple index_key = tuple([unicode(key_value) for key_value in eval(base64.b64decode(index_key_b64))]) value_keynames = () if json.loads(value_keynames_str) is not None: value_keynames = tuple(json.loads(value_keynames_str)) index_keyname = () if json.loads(index_keyname_str) is not None: index_keyname = (json.loads(index_keyname_str),) logging.info("project : %s", project) logging.info("tablename : %s", tablename) logging.info("datestring : %s", datestring) logging.info("index_key : %s", index_key) logging.info("value_keynames : %s", value_keynames) logging.info("index_keyname : %s", index_keyname) datalogger = DataLogger(basedir, project, tablename) index_key_dict = dict(zip(datalogger.index_keynames, index_key)) # build filter if any group_by is given filterkeys = index_key_dict # default if len(index_keyname) > 0: filterkeys = {} for key in index_keyname: filterkeys[key] = index_key_dict[key] logging.info("using filterkeys: %s", filterkeys) tsa = datalogger.load_tsa(datestring, filterkeys=filterkeys) logging.info("got tsa with %d keys", len(tsa)) # grouping stuff if necessary data = None # holds finally calculated data stats = None # holds tsstats informations if len(index_keyname) > 0: # grouping by key named logging.info("generating new key for left possible keys in grouped tsa") new_key = tuple((index_key_dict[key] for key in index_keyname)) logging.info("key after grouping would be %s", new_key) logging.info("grouping tsa by %s", index_keyname) new_tsa = datalogger.group_by(datestring, tsa, index_keyname, group_func=lambda a, b: a + b) tsa = new_tsa data = tsa[new_key].dump_dict() stats = tsa[new_key].stats.get_stats() else: # not grouping, simple data = tsa[index_key].dump_dict() stats = tsa[index_key].stats.get_stats() # holds return data logging.info("data keys : %s", data[data.keys()[0]].keys()) # get in highcharts shape result = { "stats" : stats, "data" : [], # holds highchart data } for value_keyname in value_keynames: # its important to sort by timestamp, to not confuse # highcharts result["data"].append( { "name" : value_keyname, "data" : tuple(((ts * 1000, row_dict[value_keyname]) for ts, row_dict in sorted(data.items()))) } ) return json.dumps(result)
def get_chart_data_ungrouped(self, args): """ get values from RAW Archive parameters: /<str>project/<str>tablename/<str>datestring/<str>key/<str>value_keys/<str>datetype/<str>group_str keyids=hostname:srvszp2orb.tilak.cc means this is only useful if keyids are unique return data like this: [ { name: "name of this series" usually this is the counter name data : [[ts, value], ...] }, ... ] """ assert len(args) == 7 project, tablename, datestring, keys_str, value_keys_str, datatype_str, group_str = args # key_str should be a tuple string, convert to unicode tuple keys = tuple([unicode(key_value) for key_value in eval(base64.b64decode(keys_str))]) value_keys = () if json.loads(value_keys_str) is not None: value_keys = tuple(json.loads(value_keys_str)) datatype = json.loads(datatype_str) group_by = () if json.loads(group_str) is not None: group_by = (json.loads(group_str),) logging.info("project : %s", project) logging.info("tablename : %s", tablename) logging.info("datestring : %s", datestring) logging.info("keys : %s", keys) logging.info("value_keys : %s", value_keys) logging.info("datatype : %s", datatype) logging.info("group_by : %s", group_by) datalogger = DataLogger(basedir, project, tablename) keys_dict = dict(zip(datalogger.index_keynames, keys)) # build filter if any group_by is given filterkeys = keys_dict # default if len(group_by) > 0: filterkeys = {} for key in group_by: filterkeys[key] = keys_dict[key] logging.info("useing filterkeys: %s", filterkeys) tsa = datalogger.load_tsa(datestring, filterkeys=filterkeys) logging.info("got tsa with %d keys", len(tsa)) # is there something to calculate, lets do it if datatype != u"absolute": new_value_keys = [] for value_key in value_keys: new_value_key = None if datatype == "derive": new_value_key = "%s_d" % value_key logging.info("deriving %s to %s", value_key, new_value_key) tsa.add_derive_col(value_key, new_value_key) elif datatype == "per_s": new_value_key = "%s_s" % value_key logging.info("deriving %s to %s", value_key, new_value_key) tsa.add_per_s_col(value_key, new_value_key) tsa.remove_col(value_key) new_value_keys.append(new_value_key) value_keys = new_value_keys #logging.info(tsa.get_value_keys()) # grouping stuff if necessary data = None # holds finally calculated data stats = None if len(group_by) > 0: logging.info("generating new key for left possible keys in grouped tsa") key_dict = dict(zip(datalogger.index_keynames, keys)) new_key = tuple((key_dict[key] for key in group_by)) logging.info("key after grouping would be %s", new_key) logging.info("grouping tsa by %s", group_by) new_tsa = datalogger.group_by(datestring, tsa, group_by, group_func=lambda a, b: a + b) #new_tsa = tsa.get_group_by_tsa(group_by, group_func=lambda a: sum(a)) tsa = new_tsa data = tsa[new_key].dump_dict() stats = tsa[new_key].stats.htmltable() else: data = tsa[keys].dump_dict() stats = tsa[keys].stats.htmltable() result = { "stats" : stats, "data" : [], } # holds return data logging.info("data keys : %s", data[data.keys()[0]].keys()) for value_key in value_keys: # ist important to sort by timestamp, to not confuse # highcharts result["data"].append( { "name" : value_key, "data" : tuple(((ts * 1000, row_dict[value_key]) for ts, row_dict in sorted(data.items()))) } ) return json.dumps(result)
class Test(unittest.TestCase): def setUp(self): self.basedir = "/var/rrd" self.datestring = "2015-11-30" self.project = DataLogger.get_projects(self.basedir)[0] self.tablename = DataLogger.get_tablenames(self.basedir, self.project)[0] self.datalogger = DataLogger(self.basedir, self.project, self.tablename) def test_simple(self): self.assertTrue(self.datalogger.project == self.project) self.assertTrue(self.datalogger.tablename == self.tablename) self.assertTrue(isinstance(self.datalogger.delimiter, basestring)) self.assertTrue(isinstance(self.datalogger.ts_keyname, basestring)) self.assertTrue(isinstance(self.datalogger.headers, tuple)) self.assertTrue(isinstance(self.datalogger.value_keynames, tuple)) self.assertTrue(all((keyname in self.datalogger.headers for keyname in self.datalogger.value_keynames))) self.assertTrue(isinstance(self.datalogger.index_keynames, tuple)) self.assertTrue(all((keyname in self.datalogger.headers for keyname in self.datalogger.index_keynames))) self.assertTrue(isinstance(self.datalogger.blacklist, tuple)) self.assertTrue(all((keyname in self.datalogger.headers for keyname in self.datalogger.blacklist))) self.assertTrue(isinstance(self.datalogger.raw_basedir, basestring)) self.assertTrue(os.path.exists(self.datalogger.raw_basedir)) self.assertTrue(os.path.isdir(self.datalogger.raw_basedir)) self.assertTrue(isinstance(self.datalogger.global_cachedir, basestring)) self.assertTrue(os.path.exists(self.datalogger.global_cachedir)) self.assertTrue(os.path.isdir(self.datalogger.global_cachedir)) # meta is something like this # {u'ts_keyname': u'ts', # 'stat_func_names': [u'count', ... ], # u'interval': 300, # u'blacklist': [], # u'headers': [u'ts', u'http_host', ... ], # u'delimiter': u'\t', # u'value_keynames': { # u'actconn': u'asis', # u'hits': u'asis', # ... # }, # u'index_keynames': [u'http_host']} self.assertTrue(self.datalogger.meta["headers"] == list(self.datalogger.headers)) self.assertTrue(self.datalogger.meta["value_keynames"].keys() == list(self.datalogger.value_keynames)) self.assertTrue(self.datalogger.meta["index_keynames"] == list(self.datalogger.index_keynames)) self.assertTrue(self.datalogger.meta["blacklist"] == list(self.datalogger.blacklist)) self.assertTrue(self.datalogger.meta["delimiter"] == self.datalogger.delimiter) self.assertTrue(self.datalogger.meta["ts_keyname"] == self.datalogger.ts_keyname) self.assertTrue(isinstance(self.datalogger.meta["stat_func_names"], list)) def test_statics(self): self.assertTrue(isinstance(DataLogger.get_user(self.basedir), basestring)) self.assertTrue(isinstance(DataLogger.get_group(self.basedir), basestring)) self.assertTrue(isinstance(DataLogger.get_yesterday_datestring(), basestring)) lbd = DataLogger.get_last_business_day_datestring() self.assertTrue(isinstance(DataLogger.get_last_business_day_datestring(), basestring)) self.assertTrue(isinstance(DataLogger.datestring_to_date(lbd), datetime.date)) for datestring in DataLogger.datewalker("2016-01-01", "2016-02-29"): self.assertTrue(isinstance(datestring, basestring)) for datestring in DataLogger.monthwalker("2016-02"): self.assertTrue(isinstance(datestring, basestring)) self.assertEqual(list(DataLogger.monthwalker("2016-02"))[-1], "2016-02-29") self.assertTrue(isinstance(DataLogger.get_ts_for_datestring("2016-01-01"), tuple)) self.assertTrue(isinstance(DataLogger.get_ts_for_datestring("2016-01-01")[0], float)) self.assertTrue(isinstance(DataLogger.get_ts_for_datestring("2016-01-01")[1], float)) def test_data(self): self.datalogger.load_tsa(self.datestring) self.datalogger.load_tsastats(self.datestring) self.datalogger.load_correlationmatrix(self.datestring) self.datalogger.load_quantile(self.datestring)