def get_table_timestamp(self, table, method='max', epoch=None, ignore_errors=False): #, tref = -180*86400): """ method should be min() for first value and max() for last this query goes directly to table indexes this doesn't access values (but it is much faster) if table is an attribute name, only these attribute is checked ignore_errors=True, it will ignore dates out of 1970-NOW interval epoch=timestamp, gets last timestamp before epoch Returns a tuple containing: (the first/last value stored, in epoch and date format, size of table, time needed) """ t0, last, size = fn.now(), 0, 0 #print('get_last_value_in_table(%s, %s)' % (self.self_name, table)) if table in self.get_data_tables(): ids = self.get_attributes_by_table(table, as_id=True) else: aid, atype, table = self.get_attr_id_type_table(table) ids = [aid] int_time = any('int_time' in v for v in self.getTableIndex(table).values()) # If using UNIX_TIMESTAMP THE INDEXING FAILS!! field = 'int_time' if int_time else 'data_time' q = 'select %s(%s) from %s ' % (method, field, table) size = self.getTableSize(table) r = [] part = None #self.get_last_partition(table) if part is not None and method == 'max': q += 'partition (%s)' % part for i in ids: qi = q + ' where att_conf_id=%d' % i #if tref and int_time: where += ('int_time <= %d'% (tref)) r.extend(self.Query(qi)) method = {'max': max, 'min': min}[method] r = [ self.mysqlsecs2time(l[0]) if int_time else fn.date2time(l[0]) for l in r if l[0] not in (None, 0) ] r = [l for l in r if l if (ignore_errors or 1e9 < l < fn.now())] if len(r): last = method(r) if len(r) else 0 date = fn.time2str(last) else: self.debug('No values in %s' % table) last, date = None, '' return (last, date, size, fn.now() - t0)
def get_table_updates(self,name=''): if name and not str(name).startswith('att_'): n = self.get_table_name(name if isinstance(name,int) else self.get_attribute_ID(name)) print '%s => %s' % (name,n) name = n q = 'select table_name,update_time from information_schema.tables where table_schema like "%s"'%self.db_name if name: q+=" and table_name like '%s'"%name updates = dict((a,fandango.date2time(t) if t else 0) for a,t in self.Query(q)) return updates
def get_last_value(self,attribute,value=-1): if value == -1: self.debug_stream('load_last_values(%s)' % attribute) value = self.api.load_last_values(attribute) if hasattr(value,'values'): value = value.values()[0] if (fn.isSequence(value) and len(value) and fn.isSequence(value[0]) and len(value[0])>1): value = value[0] if value and isinstance(value[0],fn.datetime.datetime): value = (fn.date2time(value[0]),value[1]) return value
def get_last_value(self, attribute, value=-1): if value == -1: self.debug_stream('load_last_values(%s)' % attribute) value = self.api.load_last_values(attribute) if hasattr(value, 'values'): value = value.values()[0] if (fn.isSequence(value) and len(value) and fn.isSequence(value[0]) and len(value[0]) > 1): value = value[0] if value and isinstance(value[0], fn.datetime.datetime): value = (fn.date2time(value[0]), value[1]) return value
def archiving_check(schema,csvpath=''): api = PyTangoArchiving.ArchivingAPI(schema) states = api.servers.states() values = api.load_last_values()#time consuming on HDB shouldbe = sorted(a for a in api if values[a] and fandango.date2time(values[a][0][0]) > time.time()-2*30*3600*24) active = api.get_archived_attributes() updated = sorted(a for a in active if values[a] and fandango.date2time(values[a][0][0]) > time.time()-3*3600) missing = sorted(a for a in shouldbe if a not in active) lost = sorted(a for a in active if a not in updated) loadarchivers = defaultdict(list) loadservers = defaultdict(list) lostarchivers = defaultdict(list) lostservers = defaultdict(list) for a in active: arch = api[a].archiver.lower() server = api.servers.get_device_server(arch).lower() loadarchivers[arch].append(a) loadservers[server].append(a) if a in lost: lostarchivers[arch].append(a) lostservers[server].append(a) [loadservers[api.servers.get_device_server(api[a].archiver.lower()).lower()].append(a) for a in active] emptyarchivers = [a for a,v in loadarchivers.items() if not len(v)] lostrate = dict((a,len(v) and len([a for a in v if a in lost])/float(len(v))) for a,v in loadarchivers) lostserversrate = dict((a,len(v) and len([a for a in v if a in lost])/float(len(v))) for a,v in loadservers.items()) dedi = api.load_dedicated_archivers() dediattrs = defaultdict(list) [dediattrs[a.lower()].append(d) for d,v in dedi.items() for a in v]; dmult = [a for a,v in dediattrs.items() if len(v)>1] wrongnames = [a for a in dediattrs if not attribute_name_check(a)] wrongarchivers = set(k.lower() for k,v in dedi.items() if any(a.lower() in map(str.lower,v) for a in wrongnames)) wrongattrs = [a for a,v in dediattrs if a in api and api[a].archiver.lower()!=v[0].lower()] deleteattrs = [a for a in dediattrs if a not in shouldbe] fnames = GetConfigFiles(csvpath) if csvpath else GetConfigFiles() csvs = dict((f,pta.ParseCSV(f,schema)) for f in fnames) csvattrs = defaultdict(list) [csvattrs[a.lower().strip()].append(f) for f,v in csvs.items() for a in v] stats = sorted([(len(v),len(v) and len([a for a in v if a in lost])/float(len(v))) for v in loadservers.values()]) stats = [(x,fandango.avg(t[1] for t in stats if t[0]==x)) for x in sorted(set(v[0] for v in stats))] # pylab.plot([t[0] for t in stats], [t[1] for t in stats]); pylab.show() exported = dict((d,fandango.str2time(fandango.get_device_info(d).started,'%dst %B %Y at %H:%M:%S')) for d in api.get_archivers()) first = min(exported.values()) #SLOWER SPEEDS ALWAYS HAVE MORE LOST ATTRIBUTES #Let's try a different approach to restart, much less agressive than fandango.start_servers()! #It seems that there's a lock when so many devices are restarted at once! torestart = list(reversed(sorted((len(v),k) for k,v in lostservers.items()))) for k in torestart.values(): print('Restarting %s') fandango.Astor(k).stop_servers() time.sleep(20.) fandango.Astor(k).start_servers(wait=240.) allattrs = sorted(set([a for a in csvattrs if a in api]+shouldbe+active))
def decimate_db_table(db,table,host='',user='',passwd='',start=0,end=0,period=300,iteration=1000,condition='',cols=None,us=True,test=False, repeated = False): """ This method will remove all values from a MySQL table that seem duplicated in time or value. All values with a difference in time lower than period will be kept. To use it with hdb++: decimate_db_table('hdbpp',user='******',passwd='...', table = 'att_scalar_devdouble_ro', start = 0, end = now()-600*86400, period = 60, #Keep a value every 60s condition = 'att_conf_id = XX', iteration = 1000, columns = ['data_time','value_r'], us=True, ) """ print('Decimating all repeated values in %s(%s) with less ' 'than %d seconds in between.'%(table,condition,period)) db = FriendlyDB(db,host,user,passwd) if not isinstance(db,FriendlyDB) else db #rw = 'write_value' in ','.join([l[0] for l in db.Query("describe %s"%table)]).lower() #date,column = 'read_value,write_value' if rw else 'value' columns = cols or ['time','value'] date,column = columns[0],columns[1:] start = time2date(start) if isNumber(start) else time2date(str2time(start)) t0,vw0,now = start,None,time2date(time.time()) end = time2date(end) if isNumber(end) else time2date(str2time(end)) removed,pool,reps = 0,[],[] count = 0 ## WHY T0 AND END ARE DATES!?!? : to be easy to compare against read values while t0<(end or now): query = "select %s,%s from %s where" %(date,','.join(column),table) query += " '%s' < %s"%(date2str(t0,us=True),date)#,date2str(end)) if condition: query+=' and %s'%condition query += ' order by %s'%date query += ' limit %d'%iteration values = db.Query(query) #print(query+': %d'%len(values)) #print('inspecting %d values between %s and %s'%(len(values),date2str(t0),date2str(end))) if not values: break for i,v in enumerate(values): count += 1 t1,vw1 = v[0],v[1:1+len(column)] #v[1],(rw and v[2] or None) #print((i,count,t1,vw0,vw1)) e0,e1 = 1e-3*int(1e3*date2time(t0)),1e-3*int(1e3*date2time(t1)) #millisecs tdelta = e1-e0 is_last = i >= (len(values)-1) or t1 >= end buff = len(pool) if is_last or tdelta>=period or vw0!=vw1: #if tdelta>=period: print('%s >= %s'%(tdelta,period)) #elif vw0!=vw1: print('%s != %s'%(vw0,vw1)) #else: print('i = %s/%s'%(i,len(values))) # End of repeated values, apply decimation ... if buff: # Dont apply remove on windows < 1 second e1 = date2time(values[i-1][0]) #previous value if True: #(int(e1)-int(e0))>1: #print('remove %d values in pool'%len(pool)) if not test: #Don't use the between syntax!! q = "delete from %s where "%table if condition: q+= condition+' and ' #e0,e1 = e0+1,e1-1 #t0 should not be removed! q+= "%s > '%s' and "%(date,time2str(e0,us=us)) q+= "%s < '%s'"%(date,time2str(e1,us=us)) #print(q) #removed += buff db.Query(q) #print('t0: %s; removed %d values' % (date2str(t0),buff-1)) #print('pool:%s'%str(pool)) if reps: if not test: #print('repeated timestamp: %s,%s == %s,%s'%(t0,vw0,t1,vw1)) q = "delete from %s where "%(table) if condition: q+= condition+' and ' q+= "%s = '%s' limit %d" % ( date,date2str(reps[-1],us=us),len(reps)) #print(q) db.Query(q) pool,reps = [],[] #print('%s => %s'%(t0,t1)) t0,vw0 = t1,vw1 else: # repeated values with tdiff<period will be removed in a single query # This should apply only if values are different and timestamp equal? # if timestamp is repeated the condition t < d < t is useless # repeated timestamps are removed directly #print(tdelta) if repeated and not tdelta: reps.append(t1) #print(('reps',t1)) elif vw0 == vw1: #if buff and not buff%100: # print('%s repeated values in %s seconds'%(buff,tdelta)) pool.append(t1) #removed +=1 else: pass #print((vw0,vw1)) if is_last: break query = "select count(*) from %s where" %(table) query += " '%s' < %s and %s < '%s'"%(date2str(start,us=us),date,date,date2str(end,us=us)) if condition: query+=' and %s'%condition cur = db.Query(query)[0][0] removed = count-cur print('decimate_db_table(%s,%s) took %d seconds to remove %d = %d - %d values'%( table,condition,time.time()-date2time(now),removed,count,cur)) return removed