Esempio n. 1
0
    def get_tsa_adv(self, project, tablename, datestring, groupkeys, group_func_name, index_pattern):
        """
        get TimeseriesArray object for this particular project/tablename/datestring combination

        parameters:
        project <str>
        tablename <str>
        datestring <str>
        groupkeys <tuple>
        group_func_name <str>
        index_pattern <str>

        returns:
        <TimeseriesArray>
        """
        value_keynames = self.get_value_keynames(project, tablename)
        ts_keyname = self.get_ts_keyname(project, tablename)
        tsa = None
        if groupkeys is None:
            index_keynames = self.get_index_keynames(project, tablename)
            tsa = TimeseriesArray(index_keynames, value_keynames, ts_keyname)
        else:
            tsa = TimeseriesArray(groupkeys, value_keynames, ts_keyname)
        uri_params = OrderedDict()
        uri_params["project"] = project
        uri_params["tablename"] = tablename
        uri_params["datestring"] = datestring
        uri_params["groupkey_enc"] = base64.b64encode(unicode(groupkeys))
        uri_params["group_func_name"] = group_func_name
        uri_params["index_pattern"] = base64.b64encode(unicode(index_pattern))
        query_params = {}
        data = self.__get_json_chunked("get_tsa_adv", uri_params, query_params)
        for row in data:
            tsa.add(row)
        return tsa
Esempio n. 2
0
    def get_ts(self, project, tablename, datestring, key):
        """
        get Timeseries object for this particular project/tablename/datestring/key combination

        parameters:
        project <str>
        tablename <str>
        datestring <str>
        key <tuple> key to identify particular Timeseries

        returns:
        <TimeseriesArray>
        """
        index_keynames = self.get_index_keynames(project, tablename)
        value_keynames = self.get_value_keynames(project, tablename)
        ts_keyname = self.get_ts_keyname(project, tablename)
        tsa = TimeseriesArray(index_keynames, value_keynames, ts_keyname)
        uri_params = {
            "project" : project,
            "tablename" : tablename,
            "datestring" : datestring,
            "key" : base64.b64encode(unicode(key)),
        }
        query_params = {}
        data = self.__get_json_chunked("get_ts", uri_params, query_params)
        for row in data:
            tsa.add(row)
        return tsa
Esempio n. 3
0
    def get_tsa(self, project, tablename, datestring):
        """
        get TimeseriesArray object for this particular project/tablename/datestring combination

        parameters:
        project <str>
        tablename <str>
        datestring <str>

        returns:
        <TimeseriesArray>
        """
        index_keynames = self.get_index_keynames(project, tablename)
        value_keynames = self.get_value_keynames(project, tablename)
        ts_keyname = self.get_ts_keyname(project, tablename)
        tsa = TimeseriesArray(index_keynames, value_keynames, ts_keyname)
        uri_params = {
            "project" : project,
            "tablename" : tablename,
            "datestring" : datestring,
        }
        query_params = {}
        data = self.__get_json_chunked("get_tsa", uri_params, query_params)
        for row in data:
            tsa.add(row)
        return tsa
Esempio n. 4
0
def group_by_local(tsa, datestring, subkeys, group_func, slotlength):
    """
    group given tsa by subkeys, and use group_func to aggregate data
    first all Timeseries will be aligned in time, got get proper points in time

    parameters:
    tsa <TimeseriesArray>
    subkey <tuple> could also be empty, to aggregate everything
    group_func <func>
    slotlength <int>

    returns:
    <TimeseriesArray>
    """
    starttime = time.time()
    # intermediated tsa
    tsa2 = TimeseriesArray(index_keys=subkeys, value_keys=tsa.value_keys, ts_key=tsa.ts_key)
    start_ts, stop_ts = datalogger.get_ts_for_datestring(datestring)
    #align_timestamp = __gen_align_timestamp(datestring, slotlength)
    ts_keyname = tsa.ts_key
    for data in tsa.export():
        # align timestamo
        nearest_slot = round((data[ts_keyname] - start_ts) / slotlength)
        data[ts_keyname] = int(start_ts + nearest_slot * slotlength)
        #data[ts_keyname] = align_timestamp(data[ts_keyname])
        tsa2.group_add(data, group_func)
    print("Duration : %f" % (time.time() - starttime))
    #print("standardized Timeseries")
    #print(tsa2[tsa2.keys()[0]])
    #print("Grouping into one single timeseries")
    # group by hostname
    #tsa3 = TimeseriesArray(index_keys=subkeys, value_keys=tsa.value_keys, ts_key=tsa.ts_key)
    #for data in tsa2.export():
    #    tsa3.group_add(data, group_func)
    return tsa2
Esempio n. 5
0
def read_tsa_full_aligned(datestring, slotlength):
    tsa = datalogger.read_tsa_full(datestring, force=False)
    print tsa.index_keys, tsa.value_keys, tsa.ts_key
    tsa2 = TimeseriesArray(tuple(tsa.index_keys), list(tsa.value_keys), str(tsa.ts_key))
    for data in dump_and_align(tsa, slotlength):
        tsa2.add(data)
    print "new times"
    print sorted(tsa2[tsa2.keys()[0]].get_times())
    assert tsa != tsa2
    assert all(key in tsa2.keys() for key in tsa.keys())
    assert len(tsa) == len(tsa2)
    return tsa2
Esempio n. 6
0
def benchmark_load():
    # get data, from datalogger, or dataloggerhelper
    starttime = time.time()
    #tsa = datalogger.read_tsa_full(datestring, force=False)
    #print "cPickle import from raw and export every timeseries in %s" % (time.time() - starttime)
    print "starting benchmark"
    starttime = time.time()
    tsa2 = TimeseriesArray.load_from_csv(gzip.open("/tmp/test_tsa.csv.gz", "rb"))
Esempio n. 7
0
def shootout(datalogger, datestring):
    # get data, from datalogger, or dataloggerhelper
    tsa = datalogger.read_tsa_full(datestring, force=True)
    starttime = time.time()
    tsa.dump_to_csv(gzip.open("/tmp/test_tsa.csv.gz", "wb"))
    tsa2 = TimeseriesArray.load_from_csv(gzip.open("/tmp/test_tsa.csv.gz", "rb"))
    assert tsa == tsa2
    print "CSV Export/Import of whole tsa Duration %s" % (time.time() - starttime)
    starttime = time.time()
    cPickle.dump(tsa, gzip.open("/tmp/test_tsa_cPickle.gz", "wb"))
    tsa2 = cPickle.load(gzip.open("/tmp/test_tsa_cPickle.gz", "rb"))
    assert tsa == tsa2
    print "cPickle Export/Import of whole tsa Duration %s" % (time.time() - starttime)
    starttime = time.time()
    for key, ts in tsa.items():
        filehandle = gzip.open("/tmp/test_ts.csv.gz", "wb")
        ts.dump_to_csv(filehandle)
        filehandle.close()
        filehandle = gzip.open("/tmp/test_ts.csv.gz", "rb")
        ts2 = Timeseries.load_from_csv(filehandle)
        filehandle.close()
        #print ts2
        #print ts.ts_keyname, ts2.ts_keyname
        #print ts.headers, ts2.headers
        #print key, ts == ts2
        assert ts == ts2
    print "CSV Export/Import Duration %s" % (time.time() - starttime)
    starttime = time.time()
    for key, ts in tsa.items():
        filehandle = gzip.open("/tmp/test1.cPickle.gz", "wb")
        cPickle.dump(ts, filehandle)
        filehandle.close()
        filehandle = gzip.open("/tmp/test1.cPickle.gz", "rb")
        ts2 = cPickle.load(filehandle)
        filehandle.close()
        #print ts2
        #print ts.ts_keyname, ts2.ts_keyname
        #print ts.headers, ts2.headers
        #print key, ts == ts2
        assert ts == ts2
    print "cPickle Export/Import Duration %s" % (time.time() - starttime)
    #keys = tsa.keys()
    #for slottime in get_slot_timeline(datestring, 600):
    #    print slottime, slottime in tsa[keys[0]].get_times()
    #    #print tuple((tsa[key].get_single_value(slottime, 'hrStorageAllocationFailures') for key in keys))

    return