Python DataLogger.group_by Exemples, datalogger.DataLogger.group_by Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_anomality.py Projet : gunny26/datalogger

def report_group(project, tablename, datestring1, datestring2, value_key):
    # get data, from datalogger, or dataloggerhelper
    datalogger = DataLogger(BASEDIR, project, tablename)
    dataloggerweb = DataLoggerWeb(DATALOGGER_URL)
    print "loading data"
    starttime = time.time()
    # tsa1 = datalogger.load_tsa(datestring1)
    tsa1 = dataloggerweb.get_tsa(project, tablename, datestring1)
    tsa1 = datalogger.group_by(datestring1, tsa1, ("hostname",), lambda a, b: (a + b) / 2)
    # tsa2 = datalogger.load_tsa(datestring2)
    tsa2 = dataloggerweb.get_tsa(project, tablename, datestring2)
    tsa2 = datalogger.group_by(datestring2, tsa2, ("hostname",), lambda a, b: (a + b) / 2)
    print "Duration load %f" % (time.time() - starttime)
    starttime = time.time()
    cm = CorrelationMatrixTime(tsa1, tsa2, value_key)
    print "TOP most differing keys between %s and %s" % (datestring1, datestring2)
    for key, coefficient in sorted(cm.items(), key=lambda items: items[1], reverse=True)[:20]:
        print key, coefficient

Exemple #2

0

Afficher le fichier

Fichier : code.py Projet : gunny26/datalogger

 def get_tsa_adv(self, args):
     """
     return exported TimeseriesArray json formatted
     """
     group_funcs = {
         "avg" : lambda a, b: (a+b)/2,
         "min" : min,
         "max" : max,
         "sum" : lambda a, b: a+b,
     }
     logging.info(args)
     project, tablename, datestring, groupkeys_enc, group_func_name, index_pattern_enc = args
     groupkeys_dec = eval(base64.b64decode(groupkeys_enc)) # should be tuple
     logging.info("groupkeys_dec: %s", groupkeys_dec)
     index_pattern = base64.b64decode(index_pattern_enc)
     if index_pattern == "None":
         index_pattern = None
     logging.info("index_pattern: %s", index_pattern)
     assert group_func_name in group_funcs.keys()
     datalogger = DataLogger(basedir, project, tablename)
     tsa = None
     # gete data
     if groupkeys_dec is not None:
         logging.info("groupkeys is %s", groupkeys_dec)
         groupkeys = tuple([unicode(key_value) for key_value in groupkeys_dec])
         tsa1 = datalogger.load_tsa(datestring, index_pattern=index_pattern)
         tsa = datalogger.group_by(datestring, tsa1, groupkeys, group_funcs[group_func_name])
     else:
         logging.info("groupkeys is None, fallback to get ungrouped tsa")
         tsa = datalogger.load_tsa(datestring, index_pattern=index_pattern)
     logging.info(tsa.keys()[0])
     web.header('Content-type', 'text/html')
     # you must not set this option, according to
     # http://stackoverflow.com/questions/11866333/ioerror-when-trying-to-serve-file
     # web.header('Transfer-Encoding','chunked')
     yield "[" + json.dumps(tsa.export().next())
     for chunk in tsa.export():
         #logging.info("yielding %s", chunk)
         yield "," + json.dumps(chunk)
     yield "]"

Exemple #3

0

Afficher le fichier

Fichier : code.py Projet : gunny26/datalogger

    def get_hc_daily_data(self, args):
        """
        get values(min 1) from TimeseriesArray to use for highcharts graphing

        parameters:
        /project/tablename/datestring/index_key/value_keynames/index_keyname

        <b>poject</b> <str> defines which project to use
        <b>tablename</b> <str> defines which tablename to use
        <b>datestring</b> <str> in form of YYYY-MM-DD to define whih day to use
        <b>index_key</b> base64 encoded tuple, defines which Timeseries to use, ex. (u'srvcl14db2.tilak.cc', u'DB2', u'ablagsys', u'data only')
        <b>value_keynames</b> json encoded list of value_keynames to show in graph
            each value_keyname will be a separate highchart line
        <b>index_keynam</b> json encoded <str> or null
            if given, the data will be grouped on this given index_keyname
            if hostname is given the above example will be gruped by hostname=u'srvcl14db2.tilak.cc'
            and all possible Timeseries will be summed up

        return data json encoded like this
        [
            {   name : "timeseries value_name 1",
                data : [[ts, value], ...]
            },
            {   name : "timeseries value name 2",
                data : [[ts, value], ...]
            }
            ...
        ]
        this structure could already be used in highcharts.data
        """
        assert len(args) == 6
        project, tablename, datestring, index_key_b64, value_keynames_str, index_keyname_str = args
        # key_str should be a tuple string, convert to unicode tuple
        index_key = tuple([unicode(key_value) for key_value in eval(base64.b64decode(index_key_b64))])
        value_keynames = ()
        if json.loads(value_keynames_str) is not None:
            value_keynames = tuple(json.loads(value_keynames_str))
        index_keyname = ()
        if json.loads(index_keyname_str) is not None:
            index_keyname = (json.loads(index_keyname_str),)
        logging.info("project : %s", project)
        logging.info("tablename : %s", tablename)
        logging.info("datestring : %s", datestring)
        logging.info("index_key : %s", index_key)
        logging.info("value_keynames : %s", value_keynames)
        logging.info("index_keyname : %s", index_keyname)
        datalogger = DataLogger(basedir, project, tablename)
        index_key_dict = dict(zip(datalogger.index_keynames, index_key))
        # build filter if any group_by is given
        filterkeys = index_key_dict # default
        if len(index_keyname) > 0:
            filterkeys = {}
            for key in index_keyname:
                filterkeys[key] = index_key_dict[key]
        logging.info("using filterkeys: %s", filterkeys)
        tsa = datalogger.load_tsa(datestring, filterkeys=filterkeys)
        logging.info("got tsa with %d keys", len(tsa))
        # grouping stuff if necessary
        data = None # holds finally calculated data
        stats = None # holds tsstats informations
        if len(index_keyname) > 0:
            # grouping by key named
            logging.info("generating new key for left possible keys in grouped tsa")
            new_key = tuple((index_key_dict[key] for key in index_keyname))
            logging.info("key after grouping would be %s", new_key)
            logging.info("grouping tsa by %s", index_keyname)
            new_tsa = datalogger.group_by(datestring, tsa, index_keyname, group_func=lambda a, b: a + b)
            tsa = new_tsa
            data = tsa[new_key].dump_dict()
            stats = tsa[new_key].stats.get_stats()
        else:
            # not grouping, simple
            data = tsa[index_key].dump_dict()
            stats = tsa[index_key].stats.get_stats()
        # holds return data
        logging.info("data keys : %s", data[data.keys()[0]].keys())
        # get in highcharts shape
        result = {
            "stats" : stats,
            "data" : [], # holds highchart data
        }
        for value_keyname in value_keynames:
            # its important to sort by timestamp, to not confuse
            # highcharts
            result["data"].append(
                {
                    "name" : value_keyname,
                    "data" : tuple(((ts * 1000, row_dict[value_keyname]) for ts, row_dict in sorted(data.items())))
                }
            )
        return json.dumps(result)

Exemple #4

0

Afficher le fichier

Fichier : code.py Projet : gunny26/datalogger

    def get_chart_data_ungrouped(self, args):
        """
        get values from RAW Archive

        parameters:
        /<str>project/<str>tablename/<str>datestring/<str>key/<str>value_keys/<str>datetype/<str>group_str

        keyids=hostname:srvszp2orb.tilak.cc means
        this is only useful if keyids are unique

        return data like this:
        [
            {
                name: "name of this series" usually this is the counter name
                data : [[ts, value], ...]
            },
            ...
        ]
        """
        assert len(args) == 7
        project, tablename, datestring, keys_str, value_keys_str, datatype_str, group_str = args
        # key_str should be a tuple string, convert to unicode tuple
        keys = tuple([unicode(key_value) for key_value in eval(base64.b64decode(keys_str))])
        value_keys = ()
        if json.loads(value_keys_str) is not None:
            value_keys = tuple(json.loads(value_keys_str))
        datatype = json.loads(datatype_str)
        group_by = ()
        if json.loads(group_str) is not None:
            group_by = (json.loads(group_str),)
        logging.info("project : %s", project)
        logging.info("tablename : %s", tablename)
        logging.info("datestring : %s", datestring)
        logging.info("keys : %s", keys)
        logging.info("value_keys : %s", value_keys)
        logging.info("datatype : %s", datatype)
        logging.info("group_by : %s", group_by)
        datalogger = DataLogger(basedir, project, tablename)
        keys_dict = dict(zip(datalogger.index_keynames, keys))
        # build filter if any group_by is given
        filterkeys = keys_dict # default
        if len(group_by) > 0:
            filterkeys = {}
            for key in group_by:
                filterkeys[key] = keys_dict[key]
        logging.info("useing filterkeys: %s", filterkeys)
        tsa = datalogger.load_tsa(datestring, filterkeys=filterkeys)
        logging.info("got tsa with %d keys", len(tsa))
        # is there something to calculate, lets do it
        if datatype != u"absolute":
            new_value_keys = []
            for value_key in value_keys:
                new_value_key = None
                if datatype == "derive":
                    new_value_key = "%s_d" % value_key
                    logging.info("deriving %s to %s", value_key, new_value_key)
                    tsa.add_derive_col(value_key, new_value_key)
                elif datatype == "per_s":
                    new_value_key = "%s_s" % value_key
                    logging.info("deriving %s to %s", value_key, new_value_key)
                    tsa.add_per_s_col(value_key, new_value_key)
                tsa.remove_col(value_key)
                new_value_keys.append(new_value_key)
            value_keys = new_value_keys
        #logging.info(tsa.get_value_keys())
        # grouping stuff if necessary
        data = None # holds finally calculated data
        stats = None
        if len(group_by) > 0:
            logging.info("generating new key for left possible keys in grouped tsa")
            key_dict = dict(zip(datalogger.index_keynames, keys))
            new_key = tuple((key_dict[key] for key in group_by))
            logging.info("key after grouping would be %s", new_key)
            logging.info("grouping tsa by %s", group_by)
            new_tsa = datalogger.group_by(datestring, tsa, group_by, group_func=lambda a, b: a + b)
            #new_tsa = tsa.get_group_by_tsa(group_by, group_func=lambda a: sum(a))
            tsa = new_tsa
            data = tsa[new_key].dump_dict()
            stats = tsa[new_key].stats.htmltable()
        else:
            data = tsa[keys].dump_dict()
            stats = tsa[keys].stats.htmltable()
        result = {
                "stats" : stats,
                "data" : [],
                }
        # holds return data
        logging.info("data keys : %s", data[data.keys()[0]].keys())
        for value_key in value_keys:
            # ist important to sort by timestamp, to not confuse
            # highcharts
            result["data"].append(
                {
                    "name" : value_key,
                    "data" : tuple(((ts * 1000, row_dict[value_key]) for ts, row_dict in sorted(data.items())))
                }
            )
        return json.dumps(result)