def get_tsa_adv(self, project, tablename, datestring, groupkeys, group_func_name, index_pattern): """ get TimeseriesArray object for this particular project/tablename/datestring combination parameters: project <str> tablename <str> datestring <str> groupkeys <tuple> group_func_name <str> index_pattern <str> returns: <TimeseriesArray> """ value_keynames = self.get_value_keynames(project, tablename) ts_keyname = self.get_ts_keyname(project, tablename) tsa = None if groupkeys is None: index_keynames = self.get_index_keynames(project, tablename) tsa = TimeseriesArray(index_keynames, value_keynames, ts_keyname) else: tsa = TimeseriesArray(groupkeys, value_keynames, ts_keyname) uri_params = OrderedDict() uri_params["project"] = project uri_params["tablename"] = tablename uri_params["datestring"] = datestring uri_params["groupkey_enc"] = base64.b64encode(unicode(groupkeys)) uri_params["group_func_name"] = group_func_name uri_params["index_pattern"] = base64.b64encode(unicode(index_pattern)) query_params = {} data = self.__get_json_chunked("get_tsa_adv", uri_params, query_params) for row in data: tsa.add(row) return tsa
def get_ts(self, project, tablename, datestring, key): """ get Timeseries object for this particular project/tablename/datestring/key combination parameters: project <str> tablename <str> datestring <str> key <tuple> key to identify particular Timeseries returns: <TimeseriesArray> """ index_keynames = self.get_index_keynames(project, tablename) value_keynames = self.get_value_keynames(project, tablename) ts_keyname = self.get_ts_keyname(project, tablename) tsa = TimeseriesArray(index_keynames, value_keynames, ts_keyname) uri_params = { "project" : project, "tablename" : tablename, "datestring" : datestring, "key" : base64.b64encode(unicode(key)), } query_params = {} data = self.__get_json_chunked("get_ts", uri_params, query_params) for row in data: tsa.add(row) return tsa
def get_tsa(self, project, tablename, datestring): """ get TimeseriesArray object for this particular project/tablename/datestring combination parameters: project <str> tablename <str> datestring <str> returns: <TimeseriesArray> """ index_keynames = self.get_index_keynames(project, tablename) value_keynames = self.get_value_keynames(project, tablename) ts_keyname = self.get_ts_keyname(project, tablename) tsa = TimeseriesArray(index_keynames, value_keynames, ts_keyname) uri_params = { "project" : project, "tablename" : tablename, "datestring" : datestring, } query_params = {} data = self.__get_json_chunked("get_tsa", uri_params, query_params) for row in data: tsa.add(row) return tsa
def group_by_local(tsa, datestring, subkeys, group_func, slotlength): """ group given tsa by subkeys, and use group_func to aggregate data first all Timeseries will be aligned in time, got get proper points in time parameters: tsa <TimeseriesArray> subkey <tuple> could also be empty, to aggregate everything group_func <func> slotlength <int> returns: <TimeseriesArray> """ starttime = time.time() # intermediated tsa tsa2 = TimeseriesArray(index_keys=subkeys, value_keys=tsa.value_keys, ts_key=tsa.ts_key) start_ts, stop_ts = datalogger.get_ts_for_datestring(datestring) #align_timestamp = __gen_align_timestamp(datestring, slotlength) ts_keyname = tsa.ts_key for data in tsa.export(): # align timestamo nearest_slot = round((data[ts_keyname] - start_ts) / slotlength) data[ts_keyname] = int(start_ts + nearest_slot * slotlength) #data[ts_keyname] = align_timestamp(data[ts_keyname]) tsa2.group_add(data, group_func) print("Duration : %f" % (time.time() - starttime)) #print("standardized Timeseries") #print(tsa2[tsa2.keys()[0]]) #print("Grouping into one single timeseries") # group by hostname #tsa3 = TimeseriesArray(index_keys=subkeys, value_keys=tsa.value_keys, ts_key=tsa.ts_key) #for data in tsa2.export(): # tsa3.group_add(data, group_func) return tsa2
def read_tsa_full_aligned(datestring, slotlength): tsa = datalogger.read_tsa_full(datestring, force=False) print tsa.index_keys, tsa.value_keys, tsa.ts_key tsa2 = TimeseriesArray(tuple(tsa.index_keys), list(tsa.value_keys), str(tsa.ts_key)) for data in dump_and_align(tsa, slotlength): tsa2.add(data) print "new times" print sorted(tsa2[tsa2.keys()[0]].get_times()) assert tsa != tsa2 assert all(key in tsa2.keys() for key in tsa.keys()) assert len(tsa) == len(tsa2) return tsa2
def benchmark_load(): # get data, from datalogger, or dataloggerhelper starttime = time.time() #tsa = datalogger.read_tsa_full(datestring, force=False) #print "cPickle import from raw and export every timeseries in %s" % (time.time() - starttime) print "starting benchmark" starttime = time.time() tsa2 = TimeseriesArray.load_from_csv(gzip.open("/tmp/test_tsa.csv.gz", "rb"))
def shootout(datalogger, datestring): # get data, from datalogger, or dataloggerhelper tsa = datalogger.read_tsa_full(datestring, force=True) starttime = time.time() tsa.dump_to_csv(gzip.open("/tmp/test_tsa.csv.gz", "wb")) tsa2 = TimeseriesArray.load_from_csv(gzip.open("/tmp/test_tsa.csv.gz", "rb")) assert tsa == tsa2 print "CSV Export/Import of whole tsa Duration %s" % (time.time() - starttime) starttime = time.time() cPickle.dump(tsa, gzip.open("/tmp/test_tsa_cPickle.gz", "wb")) tsa2 = cPickle.load(gzip.open("/tmp/test_tsa_cPickle.gz", "rb")) assert tsa == tsa2 print "cPickle Export/Import of whole tsa Duration %s" % (time.time() - starttime) starttime = time.time() for key, ts in tsa.items(): filehandle = gzip.open("/tmp/test_ts.csv.gz", "wb") ts.dump_to_csv(filehandle) filehandle.close() filehandle = gzip.open("/tmp/test_ts.csv.gz", "rb") ts2 = Timeseries.load_from_csv(filehandle) filehandle.close() #print ts2 #print ts.ts_keyname, ts2.ts_keyname #print ts.headers, ts2.headers #print key, ts == ts2 assert ts == ts2 print "CSV Export/Import Duration %s" % (time.time() - starttime) starttime = time.time() for key, ts in tsa.items(): filehandle = gzip.open("/tmp/test1.cPickle.gz", "wb") cPickle.dump(ts, filehandle) filehandle.close() filehandle = gzip.open("/tmp/test1.cPickle.gz", "rb") ts2 = cPickle.load(filehandle) filehandle.close() #print ts2 #print ts.ts_keyname, ts2.ts_keyname #print ts.headers, ts2.headers #print key, ts == ts2 assert ts == ts2 print "cPickle Export/Import Duration %s" % (time.time() - starttime) #keys = tsa.keys() #for slottime in get_slot_timeline(datestring, 600): # print slottime, slottime in tsa[keys[0]].get_times() # #print tuple((tsa[key].get_single_value(slottime, 'hrStorageAllocationFailures') for key in keys)) return