Beispiel #1
0
    def get_table_timestamp(self,
                            table,
                            method='max',
                            epoch=None,
                            ignore_errors=False):  #, tref = -180*86400):
        """
        method should be min() for first value and max() for last
        this query goes directly to table indexes
        this doesn't access values (but it is much faster)
        
        if table is an attribute name, only these attribute is checked
        
        ignore_errors=True, it will ignore dates out of 1970-NOW interval
        
        epoch=timestamp, gets last timestamp before epoch
        
        Returns a tuple containing:
            (the first/last value stored, in epoch and date format, 
                size of table, time needed)
        """
        t0, last, size = fn.now(), 0, 0
        #print('get_last_value_in_table(%s, %s)' % (self.self_name, table))

        if table in self.get_data_tables():
            ids = self.get_attributes_by_table(table, as_id=True)
        else:
            aid, atype, table = self.get_attr_id_type_table(table)
            ids = [aid]

        int_time = any('int_time' in v
                       for v in self.getTableIndex(table).values())
        # If using UNIX_TIMESTAMP THE INDEXING FAILS!!
        field = 'int_time' if int_time else 'data_time'
        q = 'select %s(%s) from %s ' % (method, field, table)
        size = self.getTableSize(table)
        r = []
        part = None  #self.get_last_partition(table)
        if part is not None and method == 'max':
            q += 'partition (%s)' % part

        for i in ids:
            qi = q + ' where att_conf_id=%d' % i
            #if tref and int_time: where += ('int_time <= %d'% (tref))
            r.extend(self.Query(qi))

        method = {'max': max, 'min': min}[method]
        r = [
            self.mysqlsecs2time(l[0]) if int_time else fn.date2time(l[0])
            for l in r if l[0] not in (None, 0)
        ]
        r = [l for l in r if l if (ignore_errors or 1e9 < l < fn.now())]

        if len(r):
            last = method(r) if len(r) else 0
            date = fn.time2str(last)
        else:
            self.debug('No values in %s' % table)
            last, date = None, ''

        return (last, date, size, fn.now() - t0)
Beispiel #2
0
 def get_table_updates(self,name=''):
     if name and not str(name).startswith('att_'):
         n = self.get_table_name(name if isinstance(name,int) else self.get_attribute_ID(name))
         print '%s => %s'  % (name,n)
         name = n
     q = 'select table_name,update_time from information_schema.tables where table_schema like "%s"'%self.db_name
     if name: q+=" and table_name like '%s'"%name
     updates = dict((a,fandango.date2time(t) if t else 0) for a,t in self.Query(q))
     return updates
 def get_last_value(self,attribute,value=-1):
     if value == -1:
         self.debug_stream('load_last_values(%s)' % attribute)
         value = self.api.load_last_values(attribute)
     if hasattr(value,'values'): 
         value = value.values()[0]
     if (fn.isSequence(value) and len(value) and 
             fn.isSequence(value[0]) and len(value[0])>1):
         value = value[0]
     if value and isinstance(value[0],fn.datetime.datetime):
         value = (fn.date2time(value[0]),value[1])
     return value
Beispiel #4
0
 def get_last_value(self, attribute, value=-1):
     if value == -1:
         self.debug_stream('load_last_values(%s)' % attribute)
         value = self.api.load_last_values(attribute)
     if hasattr(value, 'values'):
         value = value.values()[0]
     if (fn.isSequence(value) and len(value) and fn.isSequence(value[0])
             and len(value[0]) > 1):
         value = value[0]
     if value and isinstance(value[0], fn.datetime.datetime):
         value = (fn.date2time(value[0]), value[1])
     return value
Beispiel #5
0
def archiving_check(schema,csvpath=''):
    api = PyTangoArchiving.ArchivingAPI(schema)
    
    states = api.servers.states()
    
    values = api.load_last_values()#time consuming on HDB
    shouldbe = sorted(a for a in api if values[a] and fandango.date2time(values[a][0][0]) > time.time()-2*30*3600*24)
    active = api.get_archived_attributes()
    updated = sorted(a for a in active if values[a] and fandango.date2time(values[a][0][0]) > time.time()-3*3600)
    
    missing = sorted(a for a in shouldbe if a not in active)
    lost = sorted(a for a in active if a not in updated)
    
    loadarchivers = defaultdict(list)
    loadservers = defaultdict(list)
    lostarchivers = defaultdict(list)
    lostservers = defaultdict(list)
    for a in active:
        arch = api[a].archiver.lower()
        server = api.servers.get_device_server(arch).lower()
        loadarchivers[arch].append(a)
        loadservers[server].append(a)
        if a in lost:
            lostarchivers[arch].append(a)
            lostservers[server].append(a)
            
    [loadservers[api.servers.get_device_server(api[a].archiver.lower()).lower()].append(a) for a in active]

    emptyarchivers = [a for a,v in loadarchivers.items() if not len(v)]
    lostrate = dict((a,len(v) and len([a for a in v if a in lost])/float(len(v))) for a,v in loadarchivers)
    lostserversrate = dict((a,len(v) and len([a for a in v if a in lost])/float(len(v))) for a,v in loadservers.items())
    
    dedi = api.load_dedicated_archivers()
    dediattrs = defaultdict(list)
    [dediattrs[a.lower()].append(d) for d,v in dedi.items() for a in v];
    dmult = [a for a,v in dediattrs.items() if len(v)>1]
    wrongnames = [a for a in dediattrs if not attribute_name_check(a)]
    wrongarchivers = set(k.lower() for k,v in dedi.items() if any(a.lower() in map(str.lower,v) for a in wrongnames))
    wrongattrs = [a for a,v in dediattrs if a in api and api[a].archiver.lower()!=v[0].lower()]
    deleteattrs = [a for a in dediattrs if a not in shouldbe]
    
    fnames = GetConfigFiles(csvpath) if csvpath else GetConfigFiles()
    csvs = dict((f,pta.ParseCSV(f,schema)) for f in fnames)
    csvattrs = defaultdict(list)
    [csvattrs[a.lower().strip()].append(f) for f,v in csvs.items() for a in v]
    
    stats = sorted([(len(v),len(v) and len([a for a in v if a in lost])/float(len(v))) for v in loadservers.values()])
    stats = [(x,fandango.avg(t[1] for t in stats if t[0]==x)) for x in sorted(set(v[0] for v in stats))]
    # pylab.plot([t[0] for t in stats], [t[1] for t in stats]); pylab.show()
    exported = dict((d,fandango.str2time(fandango.get_device_info(d).started,'%dst %B %Y at %H:%M:%S')) for d in api.get_archivers())
    first = min(exported.values())
    #SLOWER SPEEDS ALWAYS HAVE MORE LOST ATTRIBUTES
    
    #Let's try a different approach to restart, much less agressive than fandango.start_servers()!
    #It seems that there's a lock when so many devices are restarted at once!
    torestart = list(reversed(sorted((len(v),k) for k,v in lostservers.items())))
    
    for k in torestart.values():
        print('Restarting %s')
        fandango.Astor(k).stop_servers()
        time.sleep(20.)
        fandango.Astor(k).start_servers(wait=240.)
    
    allattrs = sorted(set([a for a in csvattrs if a in api]+shouldbe+active))
Beispiel #6
0
def decimate_db_table(db,table,host='',user='',passwd='',start=0,end=0,period=300,iteration=1000,condition='',cols=None,us=True,test=False, repeated = False):
    """ 
    This method will remove all values from a MySQL table that seem duplicated 
    in time or value.
    All values with a difference in time lower than period will be kept.
    
    To use it with hdb++:
    
    decimate_db_table('hdbpp',user='******',passwd='...',
      table = 'att_scalar_devdouble_ro',
      start = 0,
      end = now()-600*86400,
      period = 60, #Keep a value every 60s
      condition = 'att_conf_id = XX',
      iteration = 1000,
      columns = ['data_time','value_r'],
      us=True,
      )
    """
    print('Decimating all repeated values in %s(%s) with less '
      'than %d seconds in between.'%(table,condition,period))
    
    db = FriendlyDB(db,host,user,passwd) if not isinstance(db,FriendlyDB) else db
    #rw = 'write_value' in ','.join([l[0] for l in db.Query("describe %s"%table)]).lower()
    #date,column = 'read_value,write_value' if rw else 'value'
    columns = cols or ['time','value']
    date,column = columns[0],columns[1:]
    start = time2date(start) if isNumber(start) else time2date(str2time(start))
    t0,vw0,now = start,None,time2date(time.time())
    end = time2date(end) if isNumber(end) else time2date(str2time(end))
    removed,pool,reps = 0,[],[]
    count = 0
    
    ## WHY T0 AND END ARE DATES!?!? : to be easy to compare against read values

    while t0<(end or now):

        query = "select %s,%s from %s where" %(date,','.join(column),table)
        query += " '%s' < %s"%(date2str(t0,us=True),date)#,date2str(end))
        if condition: query+=' and %s'%condition
        query += ' order by %s'%date
        query += ' limit %d'%iteration
        values = db.Query(query)
        #print(query+': %d'%len(values))
        #print('inspecting %d values between %s and %s'%(len(values),date2str(t0),date2str(end)))
        
        if not values: 
            break
          
        for i,v in enumerate(values):
            count += 1
            t1,vw1 = v[0],v[1:1+len(column)] #v[1],(rw and v[2] or None)
            #print((i,count,t1,vw0,vw1))
            e0,e1 = 1e-3*int(1e3*date2time(t0)),1e-3*int(1e3*date2time(t1)) #millisecs
            tdelta = e1-e0
            is_last = i >= (len(values)-1) or t1 >= end
            buff = len(pool)

            if is_last or tdelta>=period or vw0!=vw1:
                #if tdelta>=period: print('%s >= %s'%(tdelta,period))
                #elif vw0!=vw1: print('%s != %s'%(vw0,vw1))
                #else: print('i = %s/%s'%(i,len(values)))
                # End of repeated values, apply decimation ...
                if buff:
                    # Dont apply remove on windows < 1 second
                    e1 = date2time(values[i-1][0]) #previous value
                    if True: #(int(e1)-int(e0))>1:
                        #print('remove %d values in pool'%len(pool))
                        if not test:
                            #Don't use the between syntax!!
                            q = "delete from %s where "%table
                            if condition:
                                q+= condition+' and '
                            #e0,e1 = e0+1,e1-1 #t0 should not be removed!
                            q+= "%s > '%s' and "%(date,time2str(e0,us=us)) 
                            q+= "%s < '%s'"%(date,time2str(e1,us=us))
                            #print(q)
                            #removed += buff
                            db.Query(q)

                        #print('t0: %s; removed %d values' % (date2str(t0),buff-1))
                        #print('pool:%s'%str(pool))
                        
                if reps:
                    if not test:
                        #print('repeated timestamp: %s,%s == %s,%s'%(t0,vw0,t1,vw1))
                        q = "delete from %s where "%(table)
                        if condition:
                            q+= condition+' and '
                        q+= "%s = '%s' limit %d" % (
                          date,date2str(reps[-1],us=us),len(reps))
                        #print(q)
                        db.Query(q)                
 
                pool,reps = [],[]
                #print('%s => %s'%(t0,t1))
                t0,vw0 = t1,vw1

            else:
                # repeated values with tdiff<period will be removed in a single query
                    
                # This should apply only if values are different and timestamp equal?
                # if timestamp is repeated the condition t < d < t is useless
                # repeated timestamps are removed directly
                #print(tdelta)
                if repeated and not tdelta:
                    reps.append(t1)
                    #print(('reps',t1))
                        
                elif vw0 == vw1:
                    #if buff and not buff%100:
                    #    print('%s repeated values in %s seconds'%(buff,tdelta))
                    pool.append(t1)

                    #removed +=1  
                
                else: pass
                #print((vw0,vw1))                  
                    
            if is_last: break
    
    query = "select count(*) from %s where" %(table)
    query += " '%s' < %s and %s < '%s'"%(date2str(start,us=us),date,date,date2str(end,us=us))
    if condition: query+=' and %s'%condition   
    cur =  db.Query(query)[0][0]
    removed = count-cur

    print('decimate_db_table(%s,%s) took %d seconds to remove %d = %d - %d values'%(
      table,condition,time.time()-date2time(now),removed,count,cur))

    return removed