def addRecord(sid,userID,fileClose,timestamp): rec={ '_type': 'detailed' } rec['timestamp']=timestamp*1000 if sid in AllServers: s = AllServers[sid] rec['serverID'] = sid rec['server'] = s.addr rec['site'] = s.site else: logger.warning('server still not identified: %i',sid) try: u = AllUsers[sid][userID] rec['user']=u.username rec['host']=u.host rec['location']=decoding.getLongLat(u.host) except KeyError: logger.error( '%suser %i missing.%s',decoding.bcolors.WARNING, userID, decoding.bcolors.ENDC) # print decoding.bcolors.WARNING + 'user ' + str(userID) + ' missing.' + decoding.bcolors.ENDC f = AllTransfers[sid][userID][fileClose.fileID] rec['filename'] = f.fileName rec['filesize'] = f.fileSize rec['read'] = fileClose.read rec['readv'] = fileClose.readv rec['write'] = fileClose.write d = datetime.now() mont=str(d.month) if len(mont)<2: mont = '0' + mont ind="xrd_detailed-"+str(d.year) + "." + mont rec['_index']=ind return rec
def eventCreator(): aLotOfData = [] while (True): [d, addr] = q.get() m = {} try: m = xmltodict.parse(d) except ExpatError: logger.error("could not parse: %s", d) q.task_done() continue except: logger.error("unexpected error. messsage was: %s", d) print(sys.exc_info()[0]) q.task_done() continue d = datetime.now() mont = str(d.month) if len(mont) < 2: mont = '0' + mont ind = ESindex + str(d.year) + "." + mont data = {'_index': ind, '_type': 'summary', 'IP': addr} previousState = state() currState = state() # print m s = m['statistics'] # top level pgm = s['@pgm'] # program name logger.debug("Program: %s", pgm) if (pgm != 'xrootd'): logger.warning( "Program: %s should not be sending summary information. Source: %s", pgm, s['@src']) q.task_done() continue tos = int(s['@tos']) # Unix time when the program was started. tod = int(s['@tod']) # Unix time when statistics gathering started. pid = int(s['@pid']) currState.pid = pid currState.tod = tod data['pid'] = pid data['timestamp'] = datetime.utcfromtimestamp(float(tod)).isoformat() data['tos'] = datetime.utcfromtimestamp(float(tos)).isoformat() data['cstart'] = datetime.utcfromtimestamp(float(tod)).isoformat() data['version'] = s['@ver'] # version name of the servers if '@site' in s: data['site'] = s[ '@site'] # site name specified in the configuration else: print('Server', addr, 'has no site name defined!') data['site'] = 'UnknownSite' q.task_done() aLotOfData.append(data) continue hasPrev = False if (addr in AllState): pids = AllState[addr] if (pid in pids): hasPrev = True previousState = AllState[addr][pid] #print "Previous ----" #AllState[addr][pid].prnt() #print "IP has previous values." else: logger.warning("seen this IP (%s) before, but not PID (%i).", addr, pid) else: logger.info("new IP: %s", addr) stats = s['stats'] for st in stats: sw = st['@id'] if sw == 'info': # print 'host >>>', st data['host'] = st['host'] data['location'] = decoding.getLongLat(addr) elif sw == 'link': data['link_num'] = int(st['num']) # not cumulative currState.link_total = int(st['tot']) currState.link_in = int(st['in']) currState.link_out = int(st['out']) currState.link_ctime = int(st['ctime']) currState.link_tmo = int(st['tmo']) # currState.link_stall = int(st['stall']) # currState.link_sfps = int(st['sfps']) elif sw == 'proc': currState.proc_sys = int(st['sys']['s']) currState.proc_usr = int(st['usr']['s']) logger.debug("proc %s", st) elif sw == 'xrootd': currState.xrootd_err = int(st['err']) currState.xrootd_dly = int(st['dly']) currState.xrootd_rdr = int(st['rdr']) ops = st['ops'] currState.ops_open = int(ops['open']) currState.ops_pr = int(ops['pr']) currState.ops_rd = int(ops['rd']) currState.ops_rv = int(ops['rv']) currState.ops_sync = int(ops['sync']) currState.ops_wr = int(ops['wr']) lgn = st['lgn'] currState.lgn_num = int(lgn['num']) currState.lgn_af = int(lgn['af']) currState.lgn_au = int(lgn['au']) currState.lgn_ua = int(lgn['ua']) logger.debug("xrootd %s", st) elif sw == 'sched': data['sched_in_queue'] = int(st['inq']) data['sched_threads'] = int(st['threads']) data['sched_idle_threads'] = int(st['idle']) logger.debug("sched %s", st) elif sw == 'sgen': data['sgen_as'] = int(st['as']) # data['sgen_et'] = int(st['et']) # always 0 data['cend'] = datetime.utcfromtimestamp(float( st['toe'])).isoformat() # elif sw=='ofs': #print 'ofs >>>',st q.task_done() if (hasPrev): if (currState.tod < previousState.tod): logger.warning( "package came out of order. Skipping the message.") continue data[ 'link_total'] = currState.link_total - previousState.link_total if data['link_total'] < 0: data['link_total'] = currState.link_total data['link_in'] = currState.link_in - previousState.link_in if data['link_in'] < 0: data['link_in'] = currState.link_in data['link_out'] = currState.link_out - previousState.link_out if data['link_out'] < 0: data['link_out'] = currState.link_out data[ 'link_ctime'] = currState.link_ctime - previousState.link_ctime if data['link_ctime'] < 0: data['link_ctime'] = currState.link_ctime data['link_tmo'] = currState.link_tmo - previousState.link_tmo if data['link_tmo'] < 0: data['link_tmo'] = currState.link_tmo # data['link_stall'] = currState.link_stall - previousState.link_stall # data['link_sfps'] = currState.link_sfps - previousState.link_sfps data['proc_usr'] = currState.proc_usr - previousState.proc_usr if data['proc_usr'] < 0: data['proc_usr'] = currState.proc_usr data['proc_sys'] = currState.proc_sys - previousState.proc_sys if data['proc_sys'] < 0: data['proc_sys'] = currState.proc_sys data[ 'xrootd_errors'] = currState.xrootd_err - previousState.xrootd_err # these should not overflow data[ 'xrootd_delays'] = currState.xrootd_dly - previousState.xrootd_dly data[ 'xrootd_redirections'] = currState.xrootd_rdr - previousState.xrootd_rdr data['ops_open'] = currState.ops_open - previousState.ops_open if data['ops_open'] < 0: data['ops_open'] = currState.ops_open data['ops_preread'] = currState.ops_pr - previousState.ops_pr if data['ops_preread'] < 0: data['ops_preread'] = currState.ops_pr data['ops_read'] = currState.ops_rd - previousState.ops_rd if data['ops_read'] < 0: data['ops_read'] = currState.ops_rd data['ops_readv'] = currState.ops_rv - previousState.ops_rv if data['ops_readv'] < 0: data['ops_readv'] = currState.ops_rv data['ops_sync'] = currState.ops_sync - previousState.ops_sync if data['ops_sync'] < 0: data['ops_sync'] = currState.ops_sync data['ops_write'] = currState.ops_wr - previousState.ops_wr if data['ops_write'] < 0: data['ops_write'] = currState.ops_wr data['login_attempts'] = currState.lgn_num - previousState.lgn_num data[ 'authentication_failures'] = currState.lgn_af - previousState.lgn_af data[ 'authentication_successes'] = currState.lgn_au - previousState.lgn_au data[ 'unauthenticated_successes'] = currState.lgn_ua - previousState.lgn_ua aLotOfData.append(data) else: if addr not in AllState: AllState[addr] = {} AllState[addr][pid] = currState # print "current state ----" # currState.prnt() if q.qsize() % 200 == 199: logger.error( 'Some problem in sending data to ES. Trying to reconnect.') RefreshConnection() if len(aLotOfData) % 21 == 20: try: res = helpers.bulk(es, aLotOfData, raise_on_exception=True, request_timeout=60) logger.info("%s \tinserted: %i \terrors: %s", threading.current_thread().name, res[0], str(res[1])) aLotOfData = [] except es_exceptions.ConnectionError as e: logger.error('ConnectionError %s', e) except es_exceptions.TransportError as e: logger.error('TransportError %s ', e) except helpers.BulkIndexError as e: logger.error('%s', e[0]) errcount = 0 for i in e[1]: errcount += 1 if errcount > 5: break logger.error('%s', i) except: logger.error('Something seriously wrong happened.') e = sys.exc_info()[0] logger.error(e)
def eventCreator(): aLotOfData=[] while(True): [d,addr]=q.get() m={} try: m=xmltodict.parse(d) except ExpatError: logger.error ("could not parse: %s", d) q.task_done() continue except: logger.error ("unexpected error. messsage was: %s", d) print(sys.exc_info()[0]) q.task_done() continue d = datetime.now() ind="xrd_summary-"+str(d.year)+"."+str(d.month)+"."+str(d.day) data = { '_index': ind, '_type': 'summary', 'IP':addr } previousState=state() currState=state() # print m s=m['statistics'] # top level pgm = s['@pgm'] # program name logger.debug("Program: %s", pgm) if (pgm != 'xrootd'): logger.warning("Program: %s should not be sending summary information. Source: %s", pgm, s['@src']) q.task_done() continue tos = int(s['@tos']) # Unix time when the program was started. tod = int(s['@tod']) # Unix time when statistics gathering started. pid = int(s['@pid']) currState.pid = pid currState.tod = tod data['pid'] = pid data['timestamp'] = datetime.utcfromtimestamp(float(tod)).isoformat() data['tos'] = datetime.utcfromtimestamp(float(tos)).isoformat() data['cstart'] = datetime.utcfromtimestamp(float(tod)).isoformat() data['version'] = s['@ver'] # version name of the servers data['site'] = s['@site'] # site name specified in the configuration hasPrev=False if (addr in AllState): pids=AllState[addr] if (pid in pids): hasPrev=True previousState=AllState[addr][pid] #print "Previous ----" #AllState[addr][pid].prnt() #print "IP has previous values." else: logger.warning("seen this IP (%s) before, but not PID (%i).", addr, pid) else: logger.info("new IP: %s", addr) stats=s['stats'] for st in stats: sw=st['@id'] if sw=='info': # print 'host >>>', st data['host']=st['host'] data['location'] = decoding.getLongLat(addr) elif sw=='link': data['link_num'] = int(st['num']) # not cumulative currState.link_total = int(st['tot']) currState.link_in = int(st['in']) currState.link_out = int(st['out']) currState.link_ctime = int(st['ctime']) currState.link_tmo = int(st['tmo']) # currState.link_stall = int(st['stall']) # currState.link_sfps = int(st['sfps']) elif sw=='proc': currState.proc_sys = int(st['sys']['s']) currState.proc_usr = int(st['usr']['s']) logger.debug("proc %s", st) elif sw=='xrootd': currState.xrootd_err = int(st['err']) currState.xrootd_dly = int(st['dly']) currState.xrootd_rdr = int(st['rdr']) ops=st['ops'] currState.ops_open = int(ops['open']) currState.ops_pr = int(ops['pr']) currState.ops_rd = int(ops['rd']) currState.ops_rv = int(ops['rv']) currState.ops_sync = int(ops['sync']) currState.ops_wr = int(ops['wr']) lgn=st['lgn'] currState.lgn_num = int(lgn['num']) currState.lgn_af = int(lgn['af']) currState.lgn_au = int(lgn['au']) currState.lgn_ua = int(lgn['ua']) logger.debug("xrootd %s", st) elif sw=='sched': data['sched_in_queue'] = int(st['inq']) data['sched_threads'] = int(st['threads']) data['sched_idle_threads'] = int(st['idle']) logger.debug("sched %s", st) elif sw=='sgen': data['sgen_as'] = int(st['as']) # data['sgen_et'] = int(st['et']) # always 0 data['cend'] = datetime.utcfromtimestamp(float(st['toe'])).isoformat() # elif sw=='ofs': #print 'ofs >>>',st q.task_done() if (hasPrev): if (currState.tod<previousState.tod): logger.warning("package came out of order. Skipping the message.") continue data['link_total'] = currState.link_total - previousState.link_total if data['link_total']<0: data['link_total'] = currState.link_total data['link_in'] = currState.link_in - previousState.link_in if data['link_in']<0: data['link_in'] = currState.link_in data['link_out'] = currState.link_out - previousState.link_out if data['link_out']<0: data['link_out'] = currState.link_out data['link_ctime'] = currState.link_ctime - previousState.link_ctime if data['link_ctime']<0: data['link_ctime'] = currState.link_ctime data['link_tmo'] = currState.link_tmo - previousState.link_tmo if data['link_tmo']<0: data['link_tmo'] = currState.link_tmo # data['link_stall'] = currState.link_stall - previousState.link_stall # data['link_sfps'] = currState.link_sfps - previousState.link_sfps data['proc_usr'] = currState.proc_usr - previousState.proc_usr if data['proc_usr']<0: data['proc_usr'] = currState.proc_usr data['proc_sys'] = currState.proc_sys - previousState.proc_sys if data['proc_sys']<0: data['proc_sys'] = currState.proc_sys data['xrootd_errors'] = currState.xrootd_err - previousState.xrootd_err # these should not overflow data['xrootd_delays'] = currState.xrootd_dly - previousState.xrootd_dly data['xrootd_redirections'] = currState.xrootd_rdr - previousState.xrootd_rdr data['ops_open'] = currState.ops_open - previousState.ops_open if data['ops_open']<0: data['ops_open'] = currState.ops_open data['ops_preread'] = currState.ops_pr - previousState.ops_pr if data['ops_preread']<0: data['ops_preread'] = currState.ops_pr data['ops_read'] = currState.ops_rd - previousState.ops_rd if data['ops_read']<0: data['ops_read'] = currState.ops_rd data['ops_readv'] = currState.ops_rv - previousState.ops_rv if data['ops_readv']<0: data['ops_readv'] = currState.ops_rv data['ops_sync'] = currState.ops_sync - previousState.ops_sync if data['ops_sync']<0: data['ops_sync'] = currState.ops_sync data['ops_write'] = currState.ops_wr - previousState.ops_wr if data['ops_write']<0: data['ops_write'] = currState.ops_wr data['login_attempts'] = currState.lgn_num - previousState.lgn_num data['authentication_failures'] = currState.lgn_af - previousState.lgn_af data['authentication_successes'] = currState.lgn_au - previousState.lgn_au data['unauthenticated_successes'] = currState.lgn_ua - previousState.lgn_ua aLotOfData.append(data) else: if addr not in AllState: AllState[addr]={} AllState[addr][pid]=currState # print "current state ----" # currState.prnt() if q.qsize()%200==199: logger.error('Some problem in sending data to ES. Trying to reconnect.') RefreshConnection() if len(aLotOfData)%21==20: try: res = helpers.bulk(es, aLotOfData, raise_on_exception=True,request_timeout=60) logger.info("%s \tinserted: %i \terrors: %s", threading.current_thread().name, res[0], str(res[1]) ) aLotOfData=[] except es_exceptions.ConnectionError as e: logger.error('ConnectionError %s', e) except es_exceptions.TransportError as e: logger.error('TransportError %s ', e) except helpers.BulkIndexError as e: logger.error('%s',e[0]) errcount=0 for i in e[1]: errcount+=1 if errcount>5: break logger.error('%s',i) except: logger.error('Something seriously wrong happened.') e = sys.exc_info()[0] logger.error(e)
def addRecord(sid, userID, fileClose, timestamp, addr): """ Given information to create a record, send it up to the message queue. """ rec = {} rec['timestamp'] = timestamp * 1000 # expected to be in MS since Unix epoch try: rec['server_hostname'] = socket.gethostbyaddr(addr)[0] except: pass rec['server_ip'] = addr if sid in AllServers: s = AllServers[sid] rec['serverID'] = sid rec['server'] = s.addr rec['site'] = s.site else: rec['server'] = addr # logger.warning('server still not identified: %s',sid) try: (u, auth) = AllUsers[sid][userID] if u is not None: rec['user'] = u.username rec['host'] = u.host if not re.match(r"^[\[\:f\d\.]+", u.host): rec['user_domain'] = ".".join(u.host.split('.')[-2:]) rec['location'] = decoding.getLongLat(u.host) if auth is not None: if auth.inetv != '': rec['ipv6'] = True if auth.inetv == 6 else False if auth.dn != '': rec['user_dn'] = auth.dn except KeyError: logger.error("File close record from unknown UserID=%i, SID=%s", userID, sid) AllUsers.setdefault(sid, {})[userID] = None except TypeError as e: logger.error("File close record from unknown UserID=%i, SID=%s: %s", userID, sid, str(e)) AllUsers.setdefault(sid, {})[userID] = None transfer_key = str(sid) + "." + str(fileClose.fileID) if transfer_key in AllTransfers: f = AllTransfers[transfer_key][1] rec['filename'] = f.fileName rec['filesize'] = f.fileSize rec['dirname1'] = "/".join(f.fileName.split('/', 2)[:2]) rec['dirname2'] = "/".join(f.fileName.split('/', 3)[:3]) if f.fileName.startswith('/user'): rec['logical_dirname'] = rec['dirname2'] elif f.fileName.startswith('/pnfs/fnal.gov/usr'): rec['logical_dirname'] = "/".join(f.fileName.split('/')[:5]) elif f.fileName.startswith('/gwdata'): rec['logical_dirname'] = rec['dirname2'] elif f.fileName.startswith('/chtc/'): rec['logical_dirname'] = '/chtc' else: rec['logical_dirname'] = 'unknown directory' else: rec['filename'] = "missing directory" rec['filesize'] = "-1" rec['logical_dirname'] = "missing directory" rec['read'] = fileClose.read rec['readv'] = fileClose.readv rec['write'] = fileClose.write wlcg_packet = wlcg_converter.Convert(rec) logger.debug("WLCG record to send: %s", str(wlcg_packet)) try: channel.basic_publish( connect_config.get('AMQP', 'exchange'), "file-close", json.dumps(rec), pika.BasicProperties(content_type='application/json', delivery_mode=1)) channel.basic_publish( connect_config.get('AMQP', 'wlcg_exchange'), "file-close", json.dumps(wlcg_packet), pika.BasicProperties(content_type='application/json', delivery_mode=1)) except Exception: logger.exception('Error while sending rabbitmq message') CreateRabbitConnection() channel.basic_publish( connect_config.get('AMQP', 'exchange'), "file-close", json.dumps(rec), pika.BasicProperties(content_type='application/json', delivery_mode=1)) channel.basic_publish( connect_config.get('AMQP', 'wlcg_exchange'), "file-close", json.dumps(wlcg_packet), pika.BasicProperties(content_type='application/json', delivery_mode=1)) return rec