Esempio n. 1
0
def addRecord(sid,userID,fileClose,timestamp):
    rec={
        '_type': 'detailed'
    }
    rec['timestamp']=timestamp*1000
    
    if sid in AllServers:
        s = AllServers[sid]
        rec['serverID'] = sid
        rec['server'] = s.addr
        rec['site'] = s.site
    else:
        logger.warning('server still not identified: %i',sid) 
        
    try:
        u = AllUsers[sid][userID]
        rec['user']=u.username
        rec['host']=u.host
        rec['location']=decoding.getLongLat(u.host)
    except KeyError:
        logger.error( '%suser %i missing.%s',decoding.bcolors.WARNING, userID, decoding.bcolors.ENDC)
        # print decoding.bcolors.WARNING + 'user ' + str(userID) + ' missing.' + decoding.bcolors.ENDC

    f = AllTransfers[sid][userID][fileClose.fileID]
    rec['filename'] = f.fileName
    rec['filesize'] = f.fileSize
    rec['read']     = fileClose.read
    rec['readv']    = fileClose.readv
    rec['write']    = fileClose.write
    
    d = datetime.now()
    mont=str(d.month)
    if len(mont)<2: mont = '0' + mont
    ind="xrd_detailed-"+str(d.year) + "." + mont
    rec['_index']=ind
    return rec
Esempio n. 2
0
def eventCreator():
    aLotOfData = []
    while (True):
        [d, addr] = q.get()
        m = {}
        try:
            m = xmltodict.parse(d)
        except ExpatError:
            logger.error("could not parse: %s", d)
            q.task_done()
            continue
        except:
            logger.error("unexpected error. messsage was: %s", d)
            print(sys.exc_info()[0])
            q.task_done()
            continue

        d = datetime.now()
        mont = str(d.month)
        if len(mont) < 2: mont = '0' + mont
        ind = ESindex + str(d.year) + "." + mont
        data = {'_index': ind, '_type': 'summary', 'IP': addr}

        previousState = state()
        currState = state()

        # print m
        s = m['statistics']  # top level
        pgm = s['@pgm']  # program name
        logger.debug("Program: %s", pgm)
        if (pgm != 'xrootd'):
            logger.warning(
                "Program: %s should not be sending summary information. Source: %s",
                pgm, s['@src'])
            q.task_done()
            continue

        tos = int(s['@tos'])  # Unix time when the program was started.
        tod = int(s['@tod'])  # Unix time when statistics gathering started.
        pid = int(s['@pid'])

        currState.pid = pid
        currState.tod = tod
        data['pid'] = pid
        data['timestamp'] = datetime.utcfromtimestamp(float(tod)).isoformat()
        data['tos'] = datetime.utcfromtimestamp(float(tos)).isoformat()
        data['cstart'] = datetime.utcfromtimestamp(float(tod)).isoformat()
        data['version'] = s['@ver']  # version name of the servers
        if '@site' in s:
            data['site'] = s[
                '@site']  # site name specified in the configuration
        else:
            print('Server', addr, 'has no site name defined!')
            data['site'] = 'UnknownSite'
            q.task_done()
            aLotOfData.append(data)
            continue

        hasPrev = False
        if (addr in AllState):
            pids = AllState[addr]
            if (pid in pids):
                hasPrev = True
                previousState = AllState[addr][pid]
                #print "Previous ----"
                #AllState[addr][pid].prnt()
                #print "IP has previous values."
            else:
                logger.warning("seen this IP (%s) before, but not PID (%i).",
                               addr, pid)
        else:
            logger.info("new IP: %s", addr)

        stats = s['stats']
        for st in stats:
            sw = st['@id']
            if sw == 'info':
                # print 'host >>>', st
                data['host'] = st['host']
                data['location'] = decoding.getLongLat(addr)
            elif sw == 'link':
                data['link_num'] = int(st['num'])  # not cumulative
                currState.link_total = int(st['tot'])
                currState.link_in = int(st['in'])
                currState.link_out = int(st['out'])
                currState.link_ctime = int(st['ctime'])
                currState.link_tmo = int(st['tmo'])
                # currState.link_stall = int(st['stall'])
                # currState.link_sfps  = int(st['sfps'])
            elif sw == 'proc':
                currState.proc_sys = int(st['sys']['s'])
                currState.proc_usr = int(st['usr']['s'])
                logger.debug("proc %s", st)
            elif sw == 'xrootd':
                currState.xrootd_err = int(st['err'])
                currState.xrootd_dly = int(st['dly'])
                currState.xrootd_rdr = int(st['rdr'])
                ops = st['ops']
                currState.ops_open = int(ops['open'])
                currState.ops_pr = int(ops['pr'])
                currState.ops_rd = int(ops['rd'])
                currState.ops_rv = int(ops['rv'])
                currState.ops_sync = int(ops['sync'])
                currState.ops_wr = int(ops['wr'])
                lgn = st['lgn']
                currState.lgn_num = int(lgn['num'])
                currState.lgn_af = int(lgn['af'])
                currState.lgn_au = int(lgn['au'])
                currState.lgn_ua = int(lgn['ua'])
                logger.debug("xrootd %s", st)
            elif sw == 'sched':
                data['sched_in_queue'] = int(st['inq'])
                data['sched_threads'] = int(st['threads'])
                data['sched_idle_threads'] = int(st['idle'])
                logger.debug("sched %s", st)
            elif sw == 'sgen':
                data['sgen_as'] = int(st['as'])
                # data['sgen_et']  = int(st['et']) # always 0
                data['cend'] = datetime.utcfromtimestamp(float(
                    st['toe'])).isoformat()
            # elif sw=='ofs':
            #print 'ofs    >>>',st

        q.task_done()

        if (hasPrev):
            if (currState.tod < previousState.tod):
                logger.warning(
                    "package came out of order. Skipping the message.")
                continue

            data[
                'link_total'] = currState.link_total - previousState.link_total
            if data['link_total'] < 0:
                data['link_total'] = currState.link_total

            data['link_in'] = currState.link_in - previousState.link_in
            if data['link_in'] < 0: data['link_in'] = currState.link_in

            data['link_out'] = currState.link_out - previousState.link_out
            if data['link_out'] < 0: data['link_out'] = currState.link_out

            data[
                'link_ctime'] = currState.link_ctime - previousState.link_ctime
            if data['link_ctime'] < 0:
                data['link_ctime'] = currState.link_ctime

            data['link_tmo'] = currState.link_tmo - previousState.link_tmo
            if data['link_tmo'] < 0: data['link_tmo'] = currState.link_tmo

            # data['link_stall'] = currState.link_stall - previousState.link_stall
            # data['link_sfps']  = currState.link_sfps  - previousState.link_sfps
            data['proc_usr'] = currState.proc_usr - previousState.proc_usr
            if data['proc_usr'] < 0: data['proc_usr'] = currState.proc_usr

            data['proc_sys'] = currState.proc_sys - previousState.proc_sys
            if data['proc_sys'] < 0: data['proc_sys'] = currState.proc_sys

            data[
                'xrootd_errors'] = currState.xrootd_err - previousState.xrootd_err  # these should not overflow
            data[
                'xrootd_delays'] = currState.xrootd_dly - previousState.xrootd_dly
            data[
                'xrootd_redirections'] = currState.xrootd_rdr - previousState.xrootd_rdr

            data['ops_open'] = currState.ops_open - previousState.ops_open
            if data['ops_open'] < 0: data['ops_open'] = currState.ops_open

            data['ops_preread'] = currState.ops_pr - previousState.ops_pr
            if data['ops_preread'] < 0: data['ops_preread'] = currState.ops_pr

            data['ops_read'] = currState.ops_rd - previousState.ops_rd
            if data['ops_read'] < 0: data['ops_read'] = currState.ops_rd

            data['ops_readv'] = currState.ops_rv - previousState.ops_rv
            if data['ops_readv'] < 0: data['ops_readv'] = currState.ops_rv

            data['ops_sync'] = currState.ops_sync - previousState.ops_sync
            if data['ops_sync'] < 0: data['ops_sync'] = currState.ops_sync

            data['ops_write'] = currState.ops_wr - previousState.ops_wr
            if data['ops_write'] < 0: data['ops_write'] = currState.ops_wr

            data['login_attempts'] = currState.lgn_num - previousState.lgn_num
            data[
                'authentication_failures'] = currState.lgn_af - previousState.lgn_af
            data[
                'authentication_successes'] = currState.lgn_au - previousState.lgn_au
            data[
                'unauthenticated_successes'] = currState.lgn_ua - previousState.lgn_ua
            aLotOfData.append(data)
        else:
            if addr not in AllState:
                AllState[addr] = {}

        AllState[addr][pid] = currState

        # print "current state ----"
        # currState.prnt()
        if q.qsize() % 200 == 199:
            logger.error(
                'Some problem in sending data to ES. Trying to reconnect.')
            RefreshConnection()

        if len(aLotOfData) % 21 == 20:
            try:
                res = helpers.bulk(es,
                                   aLotOfData,
                                   raise_on_exception=True,
                                   request_timeout=60)
                logger.info("%s \tinserted: %i \terrors: %s",
                            threading.current_thread().name, res[0],
                            str(res[1]))
                aLotOfData = []
            except es_exceptions.ConnectionError as e:
                logger.error('ConnectionError %s', e)
            except es_exceptions.TransportError as e:
                logger.error('TransportError %s ', e)
            except helpers.BulkIndexError as e:
                logger.error('%s', e[0])
                errcount = 0
                for i in e[1]:
                    errcount += 1
                    if errcount > 5: break
                    logger.error('%s', i)
            except:
                logger.error('Something seriously wrong happened.')
                e = sys.exc_info()[0]
                logger.error(e)
Esempio n. 3
0
def eventCreator():
    aLotOfData=[]
    while(True):
        [d,addr]=q.get()
        m={}
        try:
            m=xmltodict.parse(d)
        except ExpatError:
            logger.error ("could not parse: %s", d)
            q.task_done()
            continue
        except:
            logger.error ("unexpected error. messsage was: %s", d)
            print(sys.exc_info()[0])
            q.task_done()
            continue
            
        d = datetime.now()
        ind="xrd_summary-"+str(d.year)+"."+str(d.month)+"."+str(d.day)
        data = {
            '_index': ind,
            '_type': 'summary',
            'IP':addr
        }

        previousState=state()
        currState=state()   
                 
        # print m
        s=m['statistics'] # top level
        pgm         = s['@pgm'] # program name
        logger.debug("Program: %s", pgm)
        if (pgm != 'xrootd'):
            logger.warning("Program: %s should not be sending summary information. Source: %s", pgm, s['@src'])
            q.task_done()
            continue
            
        tos         = int(s['@tos'])  # Unix time when the program was started.
        tod         = int(s['@tod'])  # Unix time when statistics gathering started.
        pid         = int(s['@pid'])

        currState.pid = pid
        currState.tod = tod
        data['pid'] = pid
        data['timestamp'] = datetime.utcfromtimestamp(float(tod)).isoformat()
        data['tos'] = datetime.utcfromtimestamp(float(tos)).isoformat()
        data['cstart'] = datetime.utcfromtimestamp(float(tod)).isoformat()
        data['version']  = s['@ver'] # version name of the servers 
        data['site'] = s['@site'] # site name specified in the configuration
        

        hasPrev=False
        if (addr in AllState):
            pids=AllState[addr]
            if (pid in pids):
                hasPrev=True
                previousState=AllState[addr][pid]
                #print "Previous ----"
                #AllState[addr][pid].prnt()
                #print "IP has previous values."
            else:
                logger.warning("seen this IP (%s) before, but not PID (%i).", addr, pid)
        else:
            logger.info("new IP: %s", addr)
        
        stats=s['stats']
        for st in stats:
            sw=st['@id']
            if sw=='info':
                # print 'host >>>', st
                data['host']=st['host']
                data['location'] = decoding.getLongLat(addr)
            elif sw=='link':
                data['link_num']     = int(st['num']) # not cumulative
                currState.link_total = int(st['tot'])
                currState.link_in    = int(st['in'])
                currState.link_out   = int(st['out'])
                currState.link_ctime = int(st['ctime'])
                currState.link_tmo   = int(st['tmo'])
                # currState.link_stall = int(st['stall'])
                # currState.link_sfps  = int(st['sfps'])
            elif sw=='proc':
                currState.proc_sys = int(st['sys']['s'])
                currState.proc_usr = int(st['usr']['s'])
                logger.debug("proc %s", st)
            elif sw=='xrootd':
                currState.xrootd_err = int(st['err'])
                currState.xrootd_dly = int(st['dly'])
                currState.xrootd_rdr = int(st['rdr'])
                ops=st['ops']
                currState.ops_open = int(ops['open'])
                currState.ops_pr   = int(ops['pr'])
                currState.ops_rd   = int(ops['rd'])
                currState.ops_rv   = int(ops['rv'])
                currState.ops_sync = int(ops['sync'])
                currState.ops_wr   = int(ops['wr'])
                lgn=st['lgn']
                currState.lgn_num = int(lgn['num'])
                currState.lgn_af  = int(lgn['af'])
                currState.lgn_au  = int(lgn['au'])
                currState.lgn_ua  = int(lgn['ua'])
                logger.debug("xrootd %s", st)
            elif sw=='sched':
                data['sched_in_queue']  = int(st['inq'])
                data['sched_threads']  = int(st['threads'])
                data['sched_idle_threads']  = int(st['idle'])
                logger.debug("sched %s", st)
            elif sw=='sgen':
                data['sgen_as']  = int(st['as'])
                # data['sgen_et']  = int(st['et']) # always 0
                data['cend'] = datetime.utcfromtimestamp(float(st['toe'])).isoformat()
            # elif sw=='ofs':
                #print 'ofs    >>>',st
         

        q.task_done()
               
        if (hasPrev):        
            if (currState.tod<previousState.tod):
                logger.warning("package came out of order. Skipping the message.")
                continue
            
            data['link_total'] = currState.link_total - previousState.link_total
            if data['link_total']<0: data['link_total'] = currState.link_total
            
            data['link_in']    = currState.link_in    - previousState.link_in
            if data['link_in']<0: data['link_in'] = currState.link_in
            
            data['link_out']   = currState.link_out   - previousState.link_out
            if data['link_out']<0: data['link_out'] = currState.link_out
            
            data['link_ctime'] = currState.link_ctime - previousState.link_ctime
            if data['link_ctime']<0: data['link_ctime'] = currState.link_ctime
            
            data['link_tmo']   = currState.link_tmo   - previousState.link_tmo
            if data['link_tmo']<0: data['link_tmo'] = currState.link_tmo
            
            # data['link_stall'] = currState.link_stall - previousState.link_stall
            # data['link_sfps']  = currState.link_sfps  - previousState.link_sfps
            data['proc_usr']  = currState.proc_usr  - previousState.proc_usr
            if data['proc_usr']<0: data['proc_usr'] = currState.proc_usr
            
            data['proc_sys']  = currState.proc_sys  - previousState.proc_sys
            if data['proc_sys']<0: data['proc_sys'] = currState.proc_sys
            
            data['xrootd_errors'] = currState.xrootd_err - previousState.xrootd_err # these should not overflow
            data['xrootd_delays'] = currState.xrootd_dly - previousState.xrootd_dly 
            data['xrootd_redirections'] = currState.xrootd_rdr - previousState.xrootd_rdr 
            
            data['ops_open'] = currState.ops_open - previousState.ops_open
            if data['ops_open']<0: data['ops_open'] = currState.ops_open
            
            data['ops_preread']   = currState.ops_pr   - previousState.ops_pr  
            if data['ops_preread']<0: data['ops_preread'] = currState.ops_pr
            
            data['ops_read']   = currState.ops_rd   - previousState.ops_rd  
            if data['ops_read']<0: data['ops_read'] = currState.ops_rd
            
            data['ops_readv']   = currState.ops_rv   - previousState.ops_rv  
            if data['ops_readv']<0: data['ops_readv'] = currState.ops_rv
            
            data['ops_sync'] = currState.ops_sync - previousState.ops_sync
            if data['ops_sync']<0: data['ops_sync'] = currState.ops_sync
            
            data['ops_write']   = currState.ops_wr   - previousState.ops_wr  
            if data['ops_write']<0: data['ops_write'] = currState.ops_wr
            
            data['login_attempts']  = currState.lgn_num  - previousState.lgn_num 
            data['authentication_failures']   = currState.lgn_af   - previousState.lgn_af  
            data['authentication_successes']   = currState.lgn_au   - previousState.lgn_au  
            data['unauthenticated_successes']   = currState.lgn_ua   - previousState.lgn_ua  
            aLotOfData.append(data)    
        else:
            if addr not in AllState:
                AllState[addr]={}
                
        AllState[addr][pid]=currState
        
        # print "current state ----"
        # currState.prnt()
        if q.qsize()%200==199:
            logger.error('Some problem in sending data to ES. Trying to reconnect.')
            RefreshConnection()
            
        if len(aLotOfData)%21==20:
            try:
                res = helpers.bulk(es, aLotOfData, raise_on_exception=True,request_timeout=60)
                logger.info("%s \tinserted: %i \terrors: %s", threading.current_thread().name, res[0], str(res[1]) )
                aLotOfData=[]
            except es_exceptions.ConnectionError as e:
                logger.error('ConnectionError %s', e)
            except es_exceptions.TransportError as e:
                logger.error('TransportError %s ', e)
            except helpers.BulkIndexError as e:
                logger.error('%s',e[0])
                errcount=0
                for i in e[1]:
                    errcount+=1
                    if errcount>5: break
                    logger.error('%s',i)
            except:
                logger.error('Something seriously wrong happened.')
                e = sys.exc_info()[0]
                logger.error(e)
Esempio n. 4
0
def addRecord(sid, userID, fileClose, timestamp, addr):
    """
    Given information to create a record, send it up to the message queue.
    """
    rec = {}
    rec['timestamp'] = timestamp * 1000  # expected to be in MS since Unix epoch

    try:
        rec['server_hostname'] = socket.gethostbyaddr(addr)[0]
    except:
        pass

    rec['server_ip'] = addr
    if sid in AllServers:
        s = AllServers[sid]
        rec['serverID'] = sid
        rec['server'] = s.addr
        rec['site'] = s.site
    else:
        rec['server'] = addr
        # logger.warning('server still not identified: %s',sid)

    try:
        (u, auth) = AllUsers[sid][userID]
        if u is not None:
            rec['user'] = u.username
            rec['host'] = u.host
            if not re.match(r"^[\[\:f\d\.]+", u.host):
                rec['user_domain'] = ".".join(u.host.split('.')[-2:])
            rec['location'] = decoding.getLongLat(u.host)
        if auth is not None:
            if auth.inetv != '':
                rec['ipv6'] = True if auth.inetv == 6 else False
            if auth.dn != '':
                rec['user_dn'] = auth.dn
    except KeyError:
        logger.error("File close record from unknown UserID=%i, SID=%s",
                     userID, sid)
        AllUsers.setdefault(sid, {})[userID] = None
    except TypeError as e:
        logger.error("File close record from unknown UserID=%i, SID=%s: %s",
                     userID, sid, str(e))
        AllUsers.setdefault(sid, {})[userID] = None
    transfer_key = str(sid) + "." + str(fileClose.fileID)
    if transfer_key in AllTransfers:
        f = AllTransfers[transfer_key][1]
        rec['filename'] = f.fileName
        rec['filesize'] = f.fileSize
        rec['dirname1'] = "/".join(f.fileName.split('/', 2)[:2])
        rec['dirname2'] = "/".join(f.fileName.split('/', 3)[:3])
        if f.fileName.startswith('/user'):
            rec['logical_dirname'] = rec['dirname2']
        elif f.fileName.startswith('/pnfs/fnal.gov/usr'):
            rec['logical_dirname'] = "/".join(f.fileName.split('/')[:5])
        elif f.fileName.startswith('/gwdata'):
            rec['logical_dirname'] = rec['dirname2']
        elif f.fileName.startswith('/chtc/'):
            rec['logical_dirname'] = '/chtc'
        else:
            rec['logical_dirname'] = 'unknown directory'
    else:
        rec['filename'] = "missing directory"
        rec['filesize'] = "-1"
        rec['logical_dirname'] = "missing directory"
    rec['read'] = fileClose.read
    rec['readv'] = fileClose.readv
    rec['write'] = fileClose.write

    wlcg_packet = wlcg_converter.Convert(rec)
    logger.debug("WLCG record to send: %s", str(wlcg_packet))

    try:
        channel.basic_publish(
            connect_config.get('AMQP', 'exchange'), "file-close",
            json.dumps(rec),
            pika.BasicProperties(content_type='application/json',
                                 delivery_mode=1))

        channel.basic_publish(
            connect_config.get('AMQP', 'wlcg_exchange'), "file-close",
            json.dumps(wlcg_packet),
            pika.BasicProperties(content_type='application/json',
                                 delivery_mode=1))
    except Exception:
        logger.exception('Error while sending rabbitmq message')
        CreateRabbitConnection()
        channel.basic_publish(
            connect_config.get('AMQP', 'exchange'), "file-close",
            json.dumps(rec),
            pika.BasicProperties(content_type='application/json',
                                 delivery_mode=1))
        channel.basic_publish(
            connect_config.get('AMQP', 'wlcg_exchange'), "file-close",
            json.dumps(wlcg_packet),
            pika.BasicProperties(content_type='application/json',
                                 delivery_mode=1))

    return rec