def __init__(self, name, configFile): """ Initializer for the object. """ Service.__init__(self, name, configFile) # Hourly purge self.PURGE_INTERVAL = 3600 # DB Table where to store the messages self.transfers_db_table = self.param('transfersDBTable') self.transfers_db_table_rejected = self.param('rejectedDBTable') # Maximum number of messages in the buffer when making a bulk insert self.buffer_size = int(self.param('bufferSize')) self.id = self.param('id') self.dbsection = self.param('dbsection') self._next_purge = time.time() + self.PURGE_INTERVAL # Try to read the local queue try: self.localQueue = DQS(path = self.param('localQueue')) except Exception as e: self._logger.error("connection to the local queue failed")
def test_queue_queue(self): """ Test queue 2 queue. """ print("checking queue 2 queue use case") mq1_path = self.path + "/mq1" mq2_path = self.path + "/mq2" mq1 = DQS(path=mq1_path) count = 10 bodies = list() for i in range(count): body = "hello world %s" % (i, ) bodies.append(body) mq1.add_message(Message(body=body)) self.assertEqual(count, mq1.count()) cmd = "python bin/amqpclt --incoming-queue path=%s" \ " --outgoing-queue path=%s --remove --loglevel debug" \ % (mq1_path, mq2_path) (ret, out, err) = proc.timed_process(cmd.split()) self.assertEqual(0, ret, "out: %s\nerr: %s" % (out, err)) mq2 = DQS(path=mq2_path) for i in mq2: if mq2.lock(i): bodies.remove(mq2.get_message(i).body) self.assertEqual(count, mq2.count()) self.assertEqual(0, len(bodies)) print("checking queue 2 queue use case OK")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--sleep', required=False, default=0, type=float) parser.add_argument('--queue', required=False, default=default_queue, type=str) parser.add_argument('--runas', required=False, default=default_user, type=str) parser.add_argument('--purge', required=False, action='store_true', default=False) parser.add_argument('--noout', required=False, action='store_true', default=False) parser.add_argument('--num', required=False, default=0, type=int) global args args = parser.parse_args() seteuser(pwd.getpwnam(args.runas)) msgs = [] mq = DQS(path=args.queue) try: if args.purge: mq.purge() if args.sleep > 0: while True: consume_queue(mq, args.num) time.sleep(args.sleep) else: consume_queue(mq, args.num) except KeyboardInterrupt as e: raise SystemExit(0)
def main(): parser = argparse.ArgumentParser() lobj = log.Logger(sys.argv[0]) logger = lobj.get() confopts = config.parse_config(logger) nagioshost = confopts['general']['host'] tz = pytz.timezone(confopts['general']['timezone']) timestamp = datetime.datetime.now(tz).strftime('%Y-%m-%dT%H:%M:%SZ') parser.add_argument('--queue', required=True, nargs='+') # msg headers parser.add_argument('--service', required=True, type=str) parser.add_argument('--hostname', required=True, type=str) parser.add_argument('--testname', required=True, type=str) parser.add_argument('--status', required=True, type=str) # msg body parser.add_argument('--details', required=False, type=str) parser.add_argument('--vo', required=False, type=str) parser.add_argument('--site', required=False, type=str) parser.add_argument('--roc', required=False, type=str) parser.add_argument('--urlhistory', required=False, type=str) parser.add_argument('--urlhelp', required=False, type=str) args = parser.parse_args() seteuser(pwd.getpwnam(confopts['general']['runasuser'])) try: for q in args.queue: granularity = config.get_queue_granul(q) mq = DQS(path=q, granularity=granularity) msg = build_msg(args, timestamp, args.service, args.hostname, \ args.testname, args.status, nagioshost) mq.add_message(msg) except MessageError as e: logger.error('Error constructing alarm - %s', repr(e)) except KeyError: logger.error('No configured Queue for directory %s' % q) queue_paths = list() for (k, v) in confopts['queues'].items(): queue_paths.append('{0} - {1}'.format(k, v['directory'])) logger.error('Queues and directories found in config: %s' % ', '.join(queue_paths)) raise SystemExit(1) except (OSError, IOError) as e: logger.error(e) raise SystemExit(1)
def enqueue(dirq, destination, event): mq_header = { 'measurement_agent': socket.gethostname(), 'destination': destination } if 'timestamp' not in event.keys(): event['timestamp'] = time.time() mq_body = json.dumps(event) msg = Message(body=mq_body, header=mq_header) msg.is_text = True mq = DQS(path=dirq) mq.add_message(msg)
def __init__(self, name, configFile): """ Initializer for the object. """ Service.__init__(self, name, configFile) self.id = self.param('id') self.udp_port = int(self.param('udp_port')) self.udp_host = self.param('udp_host') if self.udp_host == 'hostname': self.udp_host = socket.gethostname() try: global gmq gmq = DQS(path=self.param('localQueue')) self._logger.info("Created connection to local queue %s" % self.param('localQueue')) except Exception as err: self._logger.error("Failing connection to local queue %s" % (err)) raise Exception self._logger.info('UDP listener on %s:%s' % (self.udp_host, self.udp_port)) self.server = ThreadedUDPServer((self.udp_host, self.udp_port), CMSSWUDPHandler) self._logger.info('created server. going to serve_forever in thread') self.server_thread = threading.Thread(target=self.server.serve_forever) self.server_thread.start() self._logger.info('Server loop running in thread: %s' % self.server_thread.name)
def __init__(self, start=1600, connect='iut2-net3.iu.edu', metricName='org.osg.general-perfsonar-simple.conf'): Uploader.__init__(self, start, connect, metricName) self.maxMQmessageSize = self.readConfigFile('mq-max-message-size') #Code to allow publishing data to the mq self.mq = None self.dq = self.readConfigFile('directoryqueue') self.granularity = int(self.readConfigFile('granularity')) if self.dq != None and self.dq != 'None': try: self.mq = DQS(path=self.dq, granularity=self.granularity) except Exception as e: self.add2log("Unable to create dirq %s, exception was %s, " % (self.dq, e))
def setup(self): self.dirq = DQS(path=self.shared.queue['directory']) numloop = None if (self.shared.topic['bulk'] == 1 or self.shared.topic['bulk'] >= self.shared.queue['rate']): numloop = 1 elif self.shared.queue['rate'] > self.shared.topic['bulk']: numloop = int(self.shared.queue['rate'] / self.shared.topic['bulk']) self.pubnumloop = numloop self.shared.runtime.update(inmemq=self.inmemq, pubnumloop=self.pubnumloop, dirq=self.dirq, filepublisher=False) self.publisher = self.shared.runtime['publisher'](self.events, worker=self.name)
def test_full_chain(self): """ Test kombu full chain. """ print("checking kombu full chain") try: import kombu except ImportError: print("kombu is not available, skipping it") return mq1_path = self.path + "/mq1" mq2_path = self.path + "/mq2" mq1 = DQS(path=mq1_path) count = 10 dest = "/queue/test%s" % (rndstr(10), ) bodies = list() for i in range(count): body = "hello world %s" % (i, ) bodies.append(body) msg = Message(body=body) msg.header = {"destination": dest} mq1.add_message(msg) self.assertEqual(count, mq1.count()) cmd1 = "python bin/amqpclt --incoming-queue path=%s" \ " --outgoing-broker-uri %s " \ " --outgoing-broker-module kombu " \ " --outgoing-broker-auth plain,name=guest,pass=guest" \ " --remove --loglevel debug" \ % (mq1_path, self.broker) (ret, out, err) = proc.timed_process(cmd1.split()) self.assertEqual(0, ret, "out: %s\nerr: %s" % (out, err)) cmd2 = "python bin/amqpclt --incoming-broker-uri %s" \ " --incoming-broker-module kombu" \ " --incoming-broker-auth plain,name=guest,pass=guest" \ " --subscribe destination=%s" \ " --outgoing-queue path=%s --count %d --reliable " \ "--loglevel debug" \ % (self.broker, dest, mq2_path, count) (ret, out, err) = proc.timed_process(cmd2.split()) self.assertEqual(0, ret, "out: %s\nerr: %s" % (out, err)) mq2 = DQS(path=mq2_path) for i in mq2: if mq2.lock(i): bodies.remove(mq2.get_message(i).body) self.assertEqual(count, mq2.count()) self.assertEqual(0, len(bodies)) self.assertEqual(0, mq1.count()) print("checking kombu fullchain OK")
def __init__(self, start = 1600, connect = 'iut2-net3.iu.edu', metricName='org.osg.general-perfsonar-simple.conf'): Uploader.__init__(self, start, connect, metricName) self.maxMQmessageSize = self.readConfigFile('mq-max-message-size') #Code to allow publishing data to the mq self.mq = None self.dq = self.readConfigFile('directoryqueue') self.granularity = int(self.readConfigFile('granularity')) if self.dq != None and self.dq!='None': try: self.mq = DQS(path=self.dq, granularity=self.granularity) except Exception as e: self.add2log("Unable to create dirq %s, exception was %s, " % (self.dq, e))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--session', required=False, default=str(), type=str) parser.add_argument('--num', required=False, default=0, type=int) parser.add_argument('--queue', required=False, default=default_queue, type=str) parser.add_argument('--granularity', required=False, default=60, type=int) parser.add_argument('--runas', required=False, default=default_user, type=str) parser.add_argument('--noout', required=False, action='store_true', default=False) parser.add_argument('--sleep', required=False, default=0, type=float) parser.add_argument('--bodysize', required=False, default=40, type=int) parser.add_argument('--timezone', required=False, default='UTC', type=str) args = parser.parse_args() seteuser(pwd.getpwnam(args.runas)) try: tz = timezone(args.timezone) except UnknownTimeZoneError as e: print("Timezone not correct") raise SystemExit(1) mq = DQS(path=args.queue, granularity=args.granularity) try: if args.num: for i in range(args.num): msg = construct_msg(args.session, args.bodysize, tz) queue_msg(msg, mq) if not args.noout: print(msg) else: while True: msg = construct_msg(args.session, args.bodysize, tz) queue_msg(msg, mq) if not args.noout: print(msg) if args.sleep: time.sleep(args.sleep) except KeyboardInterrupt as e: raise SystemExit(0)
def __init__(self,verbose,start,end,connect,username=None,key=None, goc=None, allowedEvents='packet-loss-rate', cert=None, certkey=None, dq=None, tmp='/tmp/rsv-perfsonar/'): # Filter variables filters.verbose = verbose #filters.verbose = True # this are the filters that later will be used for the data self.time_end = int(time.time()) self.time_start = int(self.time_end - start) self.time_max_start = int(time.time()) - 24*60*60 # Filter for metadata filters.time_start = int(self.time_end - 3*start) # Added time_end for bug that Andy found as sometime far in the future 24 hours filters.time_end = self.time_end + 24*60*60 # For logging pourposes filterDates = (strftime("%a, %d %b %Y %H:%M:%S ", time.gmtime(self.time_start)), strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(self.time_end))) #filterDates = (strftime("%a, %d %b %Y %H:%M:%S ", time.gmtime(filters.time_start))) self.add2log("Data interval is from %s to %s" %filterDates) self.add2log("Metada interval is from %s to now" % (filters.time_start)) # gfiltesrs and in general g* means connecting to the cassandra db at the central place ie goc gfilters.verbose = False gfilters.time_start = int(self.time_end - 5*start) gfilters.time_end = self.time_end gfilters.input_source = connect # Username/Key/Location/Delay self.connect = connect self.username = username self.key = key self.goc = goc self.conn = SocksSSLApiConnect("http://"+self.connect, filters) self.gconn = ApiConnect(self.goc, gfilters) self.cert = cert self.certkey = certkey self.tmpDir = tmp + '/' + self.connect +'/' # Convert the allowedEvents into a list self.allowedEvents = allowedEvents.split(',') # In general not use SSL for contacting the perfosnar hosts self.useSSL = False #Code to allow publishing data to the mq self.mq = None self.dq = dq if self.dq != None and self.dq!='None': try: self.mq = DQS(path=self.dq, granularity=5) except Exception as e: self.add2log("Unable to create dirq %s, exception was %s, " % (self.dq, e))
class ActiveMQUploader(Uploader): def __init__(self, start=1600, connect='iut2-net3.iu.edu', metricName='org.osg.general-perfsonar-simple.conf'): Uploader.__init__(self, start, connect, metricName) self.maxMQmessageSize = self.readConfigFile('mq-max-message-size') #Code to allow publishing data to the mq self.mq = None self.dq = self.readConfigFile('directoryqueue') self.granularity = int(self.readConfigFile('granularity')) if self.dq != None and self.dq != 'None': try: self.mq = DQS(path=self.dq, granularity=self.granularity) except Exception as e: self.add2log("Unable to create dirq %s, exception was %s, " % (self.dq, e)) # Publish summaries to Mq def publishSToMq(self, arguments, event_types, summaries, summaries_data): # the max size limit in KB but python expects it in bytes size_limit = self.maxMQmessageSize * 1000 for event in summaries_data.keys(): if not summaries_data[event]: continue msg_head = { 'input-source': arguments['input_source'], 'input-destination': arguments['input_destination'], 'org_metadata_key': arguments['org_metadata_key'], 'event-type': event, 'rsv-timestamp': "%s" % time.time(), 'summaries': 1, 'destination': '/topic/perfsonar.summary.' + event } msg_body = {'meta': arguments} msg_body['summaries'] = summaries_data[event] size_summ = self.total_size(summaries_data[event]) msg = Message(body=json.dumps(msg_body), header=msg_head) size_msg = msg.size() #self.add2log("Message size: %s" % size_msg) # if size of the message is larger than 10MB discarrd if size_msg > size_limit or sys.getsizeof(json.dumps( msg_body)) > size_limit or size_summ > size_limit: self.add2log( "Size of message body bigger than limit, discarding") continue # add to mq try: self.mq.add_message(msg) except Exception as e: self.add2log( "Failed to add message to mq %s, exception was %s" % (self.dq, e)) # Publish message to Mq def publishRToMq(self, arguments, event_types, datapoints): for event in datapoints.keys(): # filter events for mq (must be subset of the probe's filter) if event not in self.allowedEvents: continue # skip events that have no datapoints if not datapoints[event]: continue # compose msg msg_head = { 'input-source': arguments['input_source'], 'input-destination': arguments['input_destination'], 'org_metadata_key': arguments['org_metadata_key'], # including the timestart of the smalles measureement 'ts_start': min(datapoints[event].keys()), 'event-type': event, 'rsv-timestamp': "%s" % time.time(), 'summaries': 0, 'destination': '/topic/perfsonar.raw.' + event } msg_body = {'meta': arguments} msg_body['datapoints'] = datapoints[event] msg = Message(body=json.dumps(msg_body), header=msg_head) # add to mq try: self.mq.add_message(msg) except Exception as e: self.add2log( "Failed to add message to mq %s, exception was %s" % (self.dq, e)) def postData(self, arguments, event_types, summaries, summaries_data, metadata_key, datapoints): summary = self.summary disp = self.debug lenght_post = -1 arguments['org_metadata_key'] = metadata_key for event_type in datapoints.keys(): if len(datapoints[event_type]) > lenght_post: lenght_post = len(datapoints[event_type]) if lenght_post == 0: self.add2log("No new datapoints skipping posting for efficiency") return if self.mq and summaries_data: self.add2log("posting new summaries") self.publishSToMq(arguments, event_types, summaries, summaries_data) step_size = 100 for step in range(0, lenght_post, step_size): chunk_datapoints = {} for event_type in datapoints.keys(): chunk_datapoints[event_type] = {} if len(datapoints[event_type].keys()) > 0: pointsconsider = sorted( datapoints[event_type].keys())[step:step + step_size] for point in pointsconsider: chunk_datapoints[event_type][point] = datapoints[ event_type][point] if self.mq: self.publishRToMq(arguments, event_types, chunk_datapoints) # Updating the checkpoint files for each host/metric and metadata for event_type in datapoints.keys(): if len(datapoints[event_type].keys()) > 0: if event_type not in self.time_starts: self.time_starts[event_type] = 0 next_time_start = max( datapoints[event_type].keys()) + 1 if next_time_start > self.time_starts[event_type]: self.time_starts[event_type] = int(next_time_start) f = open(self.tmpDir + metadata_key, 'w') f.write(json.dumps(self.time_starts)) f.close() self.add2log("posting NEW METADATA/DATA to Cern Active MQ %s" % metadata_key)
class ConsumerQueue(StatSig, Process): """ Class represents spawned worker process that will periodically check and consume local cache/directory queue. It will initialize associated Publisher that will be used to dispatch consumed messages and will also spawn a Purger thread that will clean the local cache and keep it with the sound data. """ def __init__(self, events, worker=None): Process.__init__(self) self.shared = Shared(worker=worker) super(ConsumerQueue, self).__init__(worker=worker) self.name = worker self.events = events self.sess_consumed = 0 self.seenmsgs = set() self.inmemq = deque() self.setup() self.purger = Purger(self.events, worker=worker) def cleanup(self): self.unlock_dirq_msgs(self.seenmsgs) def setup(self): self.dirq = DQS(path=self.shared.queue['directory']) numloop = None if (self.shared.topic['bulk'] == 1 or self.shared.topic['bulk'] >= self.shared.queue['rate']): numloop = 1 elif self.shared.queue['rate'] > self.shared.topic['bulk']: numloop = int(self.shared.queue['rate'] / self.shared.topic['bulk']) self.pubnumloop = numloop self.shared.runtime.update(inmemq=self.inmemq, pubnumloop=self.pubnumloop, dirq=self.dirq, filepublisher=False) self.publisher = self.shared.runtime['publisher'](self.events, worker=self.name) def run(self): termev = self.events['term-' + self.name] usr1ev = self.events['usr1-' + self.name] periodev = self.events['period-' + self.name] lck = self.events['lck-' + self.name] evgup = self.events['giveup-' + self.name] while True: try: if termev.is_set(): self.shared.log.info('Process {0} received SIGTERM'.format( self.name)) lck.acquire(True) self.stats() self.publisher.stats() self.cleanup() lck.release() termev.clear() raise SystemExit(0) if usr1ev.is_set(): self.shared.log.info('Process {0} received SIGUSR1'.format( self.name)) lck.acquire(True) self.stats() self.publisher.stats() lck.release() usr1ev.clear() if periodev.is_set(): self.stat_reset() self.publisher.stat_reset() periodev.clear() nmsgs_consume = 1 if self.shared.topic['bulk'] == 1 \ else max(self.shared.topic['bulk'], self.shared.queue['rate']) if self.consume_dirq_msgs(nmsgs_consume): ret, published = self.publisher.write() if ret: self.remove_dirq_msgs() elif published: self.shared.log.error('{0} {1} giving up'.format( self.__class__.__name__, self.name)) self.stats() self.publisher.stats() self.remove_dirq_msgs(published) self.unlock_dirq_msgs( set(e[0] for e in self.inmemq).difference(published)) evgup.set() raise SystemExit(0) else: self.shared.log.error('{0} {1} giving up'.format( self.__class__.__name__, self.name)) self.stats() self.publisher.stats() self.unlock_dirq_msgs() evgup.set() raise SystemExit(0) time.sleep(1 / self.shared.queue['rate']) except KeyboardInterrupt: self.cleanup() raise SystemExit(0) def _increm_intervalcounters(self, num): now = int(time.time()) counter = self.shared.statint[self.name]['consumed'] counter[now] = num + counter.get(now, 0) self.shared.statint[self.name]['consumed_periodic'] += num def consume_dirq_msgs(self, num=0): def _inmemq_append(elem): self.inmemq.append(elem) self._increm_intervalcounters(1) self.sess_consumed += 1 if num and self.sess_consumed == num: self.sess_consumed = 0 self.seenmsgs.clear() return True try: for name in self.dirq: if os.stat(self.shared.queue['directory'] + name).st_size < 8: os.unlink(self.shared.queue['directory'] + name) if name in self.seenmsgs: continue self.seenmsgs.update([name]) already_lckd = os.path.exists(self.dirq.get_path(name)) if not already_lckd and self.dirq.lock(name): if _inmemq_append((name, self.dirq.get_message(name))): return True elif already_lckd: if _inmemq_append((name, self.dirq.get_message(name))): return True except Exception as e: self.shared.log.error(e) return False def unlock_dirq_msgs(self, msgs=None): try: msgl = msgs if msgs else self.inmemq for m in msgl: msg = m[0] if not isinstance(m, str) else m if os.path.exists('{0}/{1}'.format(self.dirq.path, msg)): self.dirq.unlock(msg) self.inmemq.clear() except (OSError, IOError) as e: self.shared.log.error(e) def remove_dirq_msgs(self, msgs=None): try: msgl = msgs if msgs else self.inmemq for m in msgl: msg = m[0] if not isinstance(m, str) else m if os.path.exists('{0}/{1}'.format(self.dirq.path, msg)): self.dirq.remove(msg) self.inmemq.clear() except (OSError, IOError) as e: self.shared.log.error(e)
class CMSSWMonCollector(Service): """ Class definition of the dashboard CMSSWMonCollector agent. """ _logger = logging.getLogger("dashboard.collector.CMSSWMonCollector") def __init__(self, name, configFile): """ Initializer for the object. """ Service.__init__(self, name, configFile) # Hourly purge self.PURGE_INTERVAL = 3600 # DB Table where to store the messages self.transfers_db_table = self.param('transfersDBTable') self.transfers_db_table_rejected = self.param('rejectedDBTable') # Maximum number of messages in the buffer when making a bulk insert self.buffer_size = int(self.param('bufferSize')) self.id = self.param('id') self.dbsection = self.param('dbsection') self._next_purge = time.time() + self.PURGE_INTERVAL # Try to read the local queue try: self.localQueue = DQS(path = self.param('localQueue')) except Exception as e: self._logger.error("connection to the local queue failed") def run(self): """ Main function of the service. While it is running it inserts messages from the messaging server into the database. """ while self.status() is not None: (names, bodies) = ([], []) msgCount = 0 #try: for name in self.localQueue: if self.localQueue.lock(name): msg = self.localQueue.get_message(name) self.decode_message(msg, bodies) names.append(name) msgCount += 1 # Exit the loop when X messages collected if (msgCount >= self.buffer_size): break (successes, failures, ellapsed_time, bulk) = self.insert_messages(names, bodies) self._logger.info( "%d messages to insert for %s, %d successfully and %d failed in %d ms (bulk = %s)" % (msgCount, self.id, successes, failures, ellapsed_time, str(bulk)) ) self.purge() # Prevent the buffer to run continuously when buffer is not full if msgCount != self.buffer_size: time.sleep(5) def JSON_format(self, message): """ Decodes messages in JSON format to a dictionary python """ if message.find(chr(4)): # If the last character is an ascii End of Transmission character we need to remove it return json.loads(message.split(chr(4))[0]) else: return json.loads(message) def delete_messages(self, names): """ """ for name in names: self.localQueue.remove(name) def purge(self): if time.time() < self._next_purge: return self.localQueue.purge(60, 60) self._next_purge = time.time() + self.PURGE_INTERVAL def validate_length(self, bodies): a = [len(x.keys()) for x in bodies] m = max(a) if a[0] < m: idx = a.index(m) bodies[0], bodies[idx] = bodies[idx], bodies[0] self._logger.warning("swap message positions 0 and %s. Missing keys %s" % (idx, [x for x in bodies[0].keys() if x not in bodies[idx].keys()])) return bodies def insert_messages(self, names, bodies): """ """ start = time.time() successes, failures, ellapsed_time, is_bulk = 0, 0, 0, True (ctx, dao) = (None, None) try: # Get a site DAO to work with ctx = DAOContext.getDAOContext(section=self.dbsection) dao = DAOFactory.getDAOFactory().getDAOObject(ctx, 'xrootd', 'XRootDDAO') # Try to make a bulk insert if len(bodies) > 0: try: bodies = self.validate_length(bodies) dao.insertMessages(bodies, self.transfers_db_table) successes = len(bodies) except Exception as msg: is_bulk = False self._logger.warning("couldn't feed all the data: %s" % msg) self._logger.warning("failed to insert %s messages. Inserting messages one by one" % len(bodies)) # Try to insert the messages one by one if any exception for body in bodies: try: dao.insertMessages(body, self.transfers_db_table) successes += 1 except Exception as msg: failures += 1 # Try to insert the malformed message in a table without any constraint if self.transfers_db_table_rejected is not None: try: body['exception'] = str(msg) dao.insertMessages(body, self.transfers_db_table_rejected) except: self._logger.warning("Couldn't feed data: %s" % msg) ctx.commit() self.delete_messages(names) except Exception as msg: # maybe it would be necessary to manage if something is wrong in the database (downtime for instance) self._logger.error("%s" % msg) ctx.destroy() raise Exception end = time.time() ms = 1000 * (end - start) return (successes, failures, int(ms), is_bulk) def decode_message(self, message, bodies): """ """ try: body = message.get_body() body = body.replace(', ,', ',') body = body.replace(':-nan,', ':null,').replace(':nan,', ':null,') msgDict = self.JSON_format(body) try: if msgDict['fallback'] == True: msgDict['fallback'] = '1' else: msgDict['fallback'] = '0' except: msgDict['fallback'] = '-' # convert time since Epoch to datetime msgDict['start_date' ] = datetime.utcfromtimestamp(int( msgDict['start_time'] ) ) msgDict['end_date' ] = datetime.utcfromtimestamp(int( msgDict['end_time'] ) ) #self._logger.info(msgDict) bodies.append(msgDict) except ValueError as msg: self._logger.warning("Impossible to decode the message: %s by JSON" % message) self._logger.error(msg) #raise msg except Exception as msg: self._logger.warning("Exception: %s" % msg)
class CMSSWMonCollector(Service): """ Class definition of the dashboard CMSSWMonCollector agent. """ _logger = logging.getLogger("dashboard.collector.CMSSWMonCollector") def __init__(self, name, configFile): """ Initializer for the object. """ Service.__init__(self, name, configFile) # Hourly purge self.PURGE_INTERVAL = 3600 # DB Table where to store the messages self.transfers_db_table = self.param('transfersDBTable') self.transfers_db_table_rejected = self.param('rejectedDBTable') # Maximum number of messages in the buffer when making a bulk insert self.buffer_size = int(self.param('bufferSize')) self.id = self.param('id') self.dbsection = self.param('dbsection') self._next_purge = time.time() + self.PURGE_INTERVAL # Try to read the local queue try: self.localQueue = DQS(path = self.param('localQueue')) except Exception as e: self._logger.error("connection to the local queue failed") def run(self): """ Main function of the service. While it is running it inserts messages from the messaging server into the database. """ while self.status() is not None: (names, bodies) = ([], []) msgCount = 0 #try: for name in self.localQueue: if self.localQueue.lock(name): msg = self.localQueue.get_message(name) self.decode_message(msg, bodies) names.append(name) msgCount += 1 # Exit the loop when X messages collected if (msgCount >= self.buffer_size): break (successes, failures, ellapsed_time, bulk) = self.insert_messages(names, bodies) self._logger.info( "%d messages to insert for %s, %d successfully and %d failed in %d ms (bulk = %s)" % (msgCount, self.id, successes, failures, ellapsed_time, str(bulk)) ) self.purge() # Prevent the buffer to run continuously when buffer is not full if msgCount != self.buffer_size: time.sleep(5) def JSON_format(self, message): """ Decodes messages in JSON format to a dictionary python """ if message.find(chr(4)): # If the last character is an ascii End of Transmission character we need to remove it return json.loads(message.split(chr(4))[0]) else: return json.loads(message) def delete_messages(self, names): """ """ for name in names: self.localQueue.remove(name) def purge(self): if time.time() < self._next_purge: return self.localQueue.purge(60, 60) self._next_purge = time.time() + self.PURGE_INTERVAL def validate_length(self, bodies): a = [len(x.keys()) for x in bodies] m = max(a) if a[0] < m: idx = a.index(m) bodies[0], bodies[idx] = bodies[idx], bodies[0] self._logger.warning("swap message positions 0 and %s. Missing keys %s" % (idx, [x for x in bodies[0].keys() if x not in bodies[idx].keys()])) return bodies def insert_messages(self, names, bodies): """ """ start = time.time() successes, failures, ellapsed_time, is_bulk = 0, 0, 0, True (ctx, dao) = (None, None) try: # Get a site DAO to work with ctx = DAOContext.getDAOContext(section=self.dbsection) dao = DAOFactory.getDAOFactory().getDAOObject(ctx, 'xrootd', 'XRootDDAO') # Try to make a bulk insert if len(bodies) > 0: try: bodies = self.validate_length(bodies) dao.insertMessages(bodies, self.transfers_db_table) successes = len(bodies) except Exception as msg: is_bulk = False self._logger.warning("couldn't feed all the data: %s" % msg) self._logger.warning("failed to insert %s messages. Inserting messages one by one" % len(bodies)) # Try to insert the messages one by one if any exception for body in bodies: try: dao.insertMessages(body, self.transfers_db_table) successes += 1 except Exception as msg: failures += 1 # Try to insert the malformed message in a table without any constraint if self.transfers_db_table_rejected is not None: try: body['exception'] = str(msg) dao.insertMessages(body, self.transfers_db_table_rejected) except: self._logger.warning("Couldn't feed data: %s" % msg) ctx.commit() self.delete_messages(names) except Exception as msg: # maybe it would be necessary to manage if something is wrong in the database (downtime for instance) self._logger.error("%s" % msg) ctx.destroy() raise Exception end = time.time() ms = 1000 * (end - start) return (successes, failures, int(ms), is_bulk) def decode_message(self, message, bodies): """ """ try: body = message.get_body() body = body.replace(', ,', ',') msgDict = self.JSON_format(body) try: if msgDict['fallback'] == True: msgDict['fallback'] = '1' else: msgDict['fallback'] = '0' except: msgDict['fallback'] = '-' # convert time since Epoch to datetime msgDict['start_date' ] = datetime.utcfromtimestamp(int( msgDict['start_time'] ) ) msgDict['end_date' ] = datetime.utcfromtimestamp(int( msgDict['end_time'] ) ) #self._logger.info(msgDict) bodies.append(msgDict) except ValueError as msg: self._logger.warning("Impossible to decode the message: %s by JSON" % message) self._logger.error(msg) #raise msg except Exception as msg: self._logger.warning("Exception: %s" % msg)
class EsmondUploader(object): def add2log(self, log): print strftime("%a, %d %b %Y %H:%M:%S", localtime()), str(log) def __init__(self,verbose,start,end,connect,username=None,key=None, goc=None, allowedEvents='packet-loss-rate', cert=None, certkey=None, dq=None, tmp='/tmp/rsv-perfsonar/'): # Filter variables filters.verbose = verbose #filters.verbose = True # this are the filters that later will be used for the data self.time_end = int(time.time()) self.time_start = int(self.time_end - start) self.time_max_start = int(time.time()) - 24*60*60 # Filter for metadata filters.time_start = int(self.time_end - 3*start) # Added time_end for bug that Andy found as sometime far in the future 24 hours filters.time_end = self.time_end + 24*60*60 # For logging pourposes filterDates = (strftime("%a, %d %b %Y %H:%M:%S ", time.gmtime(self.time_start)), strftime("%a, %d %b %Y %H:%M:%S", time.gmtime(self.time_end))) #filterDates = (strftime("%a, %d %b %Y %H:%M:%S ", time.gmtime(filters.time_start))) self.add2log("Data interval is from %s to %s" %filterDates) self.add2log("Metada interval is from %s to now" % (filters.time_start)) # gfiltesrs and in general g* means connecting to the cassandra db at the central place ie goc gfilters.verbose = False gfilters.time_start = int(self.time_end - 5*start) gfilters.time_end = self.time_end gfilters.input_source = connect # Username/Key/Location/Delay self.connect = connect self.username = username self.key = key self.goc = goc self.conn = SocksSSLApiConnect("http://"+self.connect, filters) self.gconn = ApiConnect(self.goc, gfilters) self.cert = cert self.certkey = certkey self.tmpDir = tmp + '/' + self.connect +'/' # Convert the allowedEvents into a list self.allowedEvents = allowedEvents.split(',') # In general not use SSL for contacting the perfosnar hosts self.useSSL = False #Code to allow publishing data to the mq self.mq = None self.dq = dq if self.dq != None and self.dq!='None': try: self.mq = DQS(path=self.dq, granularity=5) except Exception as e: self.add2log("Unable to create dirq %s, exception was %s, " % (self.dq, e)) # Publish message to Mq def publishToMq(self, arguments, event_types, datapoints, summaries_data): for event in datapoints.keys(): # filter events for mq (must be subset of the probe's filter) if event not in ('path-mtu', 'histogram-owdelay','packet-loss-rate','histogram-ttl','throughput','packet-retransmits','packet-trace'): continue # skip events that have no datapoints if not datapoints[event]: continue # compose msg msg_head = { 'input-source' : arguments['input_source'], 'input-destination' : arguments['input_destination'], 'event-type' : event, 'rsv-timestamp' : "%s" % time.time(), 'summaries' : 0, 'destination' : '/topic/perfsonar.' + event} msg_body = { 'meta': arguments } if summaries_data[event]: msg_body['summaries'] = summaries_data[event] msg_head['summaries'] = 1 if datapoints[event]: msg_body['datapoints'] = datapoints[event] msg = Message(body=json.dumps(msg_body), header=msg_head) # add to mq try: self.mq.add_message(msg) except Exception as e: self.add2log("Failed to add message to mq %s, exception was %s" % (self.dq, e)) # Get Data def getData(self, disp=False, summary=True): self.add2log("Only reading data for event types: %s" % (str(self.allowedEvents))) if summary: self.add2log("Reading Summaries") else: self.add2log("Omiting Sumaries") metadata = self.conn.get_metadata() try: #Test to see if https connection is succesfull md = metadata.next() self.readMetaData(md, disp, summary) except Exception as e: #Test to see if https connection is sucesful self.add2log("Unable to connect to %s, exception was %s, trying SSL" % ("http://"+self.connect, e)) try: metadata = self.conn.get_metadata(cert=self.cert, key=self.certkey) md = metadata.next() self.useSSL = True self.readMetaData(md, disp, summary) except Exception as e: raise Exception("Unable to connect to %s, exception was %s, " % ("https://"+self.connect, e)) for md in metadata: self.readMetaData(md, disp, summary) # Md is a metadata object of query def readMetaData(self, md, disp=False, summary=True): arguments = {} # Building the arguments for the post arguments = { "subject_type": md.subject_type, "source": md.source, "destination": md.destination, "tool_name": md.tool_name, "measurement_agent": md.measurement_agent, "input_source": md.input_source, "input_destination": md.input_destination, "tool_name": md.tool_name } if not md.time_duration is None: arguments["time_duration"] = md.time_duration if not md.ip_transport_protocol is None: arguments["ip_transport_protocol"] = md.ip_transport_protocol # Assigning each metadata object property to class variables event_types = md.event_types metadata_key = md.metadata_key # print extra debugging only if requested self.add2log("Reading New METADATA/DATA %s" % (md.metadata_key)) if disp: self.add2log("Posting args: ") self.add2log(arguments) # Get Events and Data Payload summaries = {} summaries_data = {} # datapoints is a dict of lists # Each of its members are lists of datapoints of a given event_type datapoints = {} datapointSample = {} #load next start times self.time_starts = {} try: f = open(self.tmpDir+md.metadata_key, 'r') self.time_starts = json.loads(f.read()) f.close() except IOError: self.add2log("first time for %s" % (md.metadata_key)) except ValueError: # decoding failed self.add2log("first time for %s" % (md.metadata_key)) for et in md.get_all_event_types(): if self.useSSL: etSSL = EventTypeSSL(et, self.cert, self.certkey) et = etSSL # Adding the time.end filter for the data since it is not used for the metadata #use previously recorded end time if available et.filters.time_start = self.time_start if et.event_type in self.time_starts.keys(): et.filters.time_start = self.time_starts[et.event_type] self.add2log("loaded previous time_start %s" % et.filters.time_start) # Not to go undefitly in the past but up to one day if et.filters.time_start < self.time_max_start: self.add2log("previous time_start %s too old. New time_start today - 24h: %s" % (et.filters.time_start, self.time_max_start) ) et.filters.time_start = self.time_max_start et.filters.time_end = filters.time_end eventype = et.event_type datapoints[eventype] = {} #et = md.get_event_type(eventype) if summary: summaries[eventype] = et.summaries else: summaries[eventype] = [] # Skip reading data points for certain event types to improv efficiency if eventype not in self.allowedEvents: continue # Read summary data summaries_data[eventype] = [] for summ in et.get_all_summaries(): if self.useSSL: summSSL = SummarySSL(summ, self.cert, self.certkey) summ = summSSL summ_data = summ.get_data() summ_dp = [ (dp.ts_epoch, dp.val) for dp in summ_data.data ] if not summ_dp: continue summaries_data[eventype].append({'event_type': eventype, 'summary_type' : summ.summary_type, 'summary_window' : summ.summary_window, 'summary_data' : summ_dp }) # Read datapoints dpay = et.get_data() tup = () for dp in dpay.data: tup = (dp.ts_epoch, dp.val) datapoints[eventype][dp.ts_epoch] = dp.val # print debugging data self.add2log("For event type %s, %d new data points" %(eventype, len(datapoints[eventype]))) if len(datapoints[eventype]) > 0 and not isinstance(tup[1], (dict,list)): # picking the first one as the sample datapointSample[eventype] = tup[1] self.add2log("Sample of the data being posted %s" % datapointSample) try: self.postData(arguments, event_types, summaries, summaries_data, metadata_key, datapoints, summary, disp) except Exception as e: raise Exception("Unable to post to %s, because exception %s. Check postgresql and cassandra services are up. Then check user and key are ok " %(self.goc, e)) def postDataSlow(self, json_payload, new_metadata_key, original_datapoints, disp=False): data = json_payload["data"] for data_point in data: epoch = data_point['ts'] datapoints = data_point["val"] for datavalue in datapoints: new_event_type = datavalue['event-type'] value = datavalue['val'] et = EventTypeBulkPost(self.goc, username=self.username, api_key=self.key, metadata_key=new_metadata_key) et.add_data_point(new_event_type, epoch, value) try: et.post_data() if epoch >= self.time_starts[new_event_type]: self.time_starts[event_type] = epoch + 1 f = open(self.tmpDir + metadata_key, 'w') f.write(json.dumps(self.time_starts)) f.close() except Exception as err: self.add2log("Exception adding new point: %s" % err) self.add2log(et.json_payload()) continue # Experimental function to try to recover from missing packet-count-sent or packet-count-lost data def getMissingData(self, timestamp, metadata_key, event_type, disp=False): filtersEsp = ApiFilters() filtersEsp.verbose = disp filtersEsp.metadata_key = metadata_key filtersEsp.time_start = timestamp - 30000 filtersEsp.time_end = timestamp + 30000 conn = SocksSSLApiConnect("http://"+self.connect, filtersEsp) if self.useSSL: metadata = conn.get_metadata(cert=self.cert, key=self.certkey) else: metadata = conn.get_metadata() datapoints = {} datapoints[event_type] = {} for md in metadata: if not md.metadata_key == metadata_key: continue et = md.get_event_type(event_type) if self.useSSL: etSSL = EventTypeSSL(et, self.cert, self.certkey) et = etSSL dpay = et.get_data() for dp in dpay.data: if dp.ts_epoch == timestamp: self.add2log("point found") datapoints[event_type][dp.ts_epoch] = dp.val return datapoints def postMetaData(self, arguments, event_types, summaries, summaries_data, metadata_key, datapoints, summary = True, disp=False): mp = MetadataPost(self.goc, username=self.username, api_key=self.key, **arguments) for event_type in summaries.keys(): mp.add_event_type(event_type) if summary: summary_window_map = {} #organize summaries windows by type so that all windows of the same type are in an array for summy in summaries[event_type]: if summy[0] not in summary_window_map: summary_window_map[summy[0]] = [] summary_window_map[summy[0]].append(summy[1]) #Add each summary type once and give the post object the array of windows for summary_type in summary_window_map: mp.add_summary_type(event_type, summary_type, summary_window_map[summary_type]) # Added the old metadata key mp.add_freeform_key_value("org_metadata_key", metadata_key) new_meta = mp.post_metadata() return new_meta # Post data points from a metadata def postBulkData(self, new_meta, old_metadata_key, datapoints, disp=False): et = EventTypeBulkPost(self.goc, username=self.username, api_key=self.key, metadata_key=new_meta.metadata_key) for event_type in datapoints.keys(): for epoch in datapoints[event_type]: # packet-loss-rate is read as a float but should be uploaded as a dict with denominator and numerator if event_type in ['packet-loss-rate', 'packet-loss-rate-bidir']: # Some extra protection incase the number of datapoints in packet-loss-setn and packet-loss-rate does not match packetcountsent = 210 packetcountlost = 0 specialTypes = ['packet-count-sent', 'packet-count-lost'] if event_type == 'packet-loss-rate-bidir': specialTypes = ['packet-count-sent', 'packet-count-lost-bidir'] for specialType in specialTypes: if not epoch in datapoints[specialType].keys(): self.add2log("Something went wrong time epoch %s not found for %s fixing it" % (specialType, epoch)) time.sleep(5) datapoints_added = self.getMissingData(epoch, old_metadata_key, specialType) # Try to get the data once more because we know it is there try: value = datapoints_added[specialType][epoch] except Exception as err: datapoints_added[specialType][epoch] = 0 value = datapoints_added[specialType][epoch] datapoints[specialType][epoch] = value et.add_data_point(specialType, epoch, value) packetcountsent = datapoints['packet-count-sent'][epoch] if event_type == 'packet-loss-rate-bidir': packetcountlost = datapoints['packet-count-lost-bidir'][epoch] else: packetcountlost = datapoints['packet-count-lost'][epoch] et.add_data_point(event_type, epoch, {'denominator': packetcountsent, 'numerator': packetcountlost}) # For the rests the data points are uploaded as they are read else: # datapoint are tuples the first field is epoc the second the value et.add_data_point(event_type, epoch, datapoints[event_type][epoch]) if disp: self.add2log("Datapoints to upload:") self.add2log(et.json_payload()) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('error', EventTypePostWarning) try: et.post_data() # Some EventTypePostWarning went wrong: except Exception as err: self.add2log("Probably this data already existed") #self.postDataSlow(json.loads(et.json_payload()), new_meta.metadata_key, datapoints, disp) for event_type in datapoints.keys(): if len(datapoints[event_type].keys()) > 0: if event_type not in self.time_starts: self.time_starts[event_type] = 0 next_time_start = max(datapoints[event_type].keys())+1 if next_time_start > self.time_starts[event_type]: self.time_starts[event_type] = int(next_time_start) f = open(self.tmpDir + old_metadata_key, 'w') f.write(json.dumps(self.time_starts)) f.close() self.add2log("posting NEW METADATA/DATA %s" % new_meta.metadata_key) def postData(self, arguments, event_types, summaries, summaries_data, metadata_key, datapoints, summary = True, disp=False): lenght_post = -1 for event_type in datapoints.keys(): if len(datapoints[event_type])>lenght_post: lenght_post = len(datapoints[event_type]) new_meta = self.postMetaData(arguments, event_types, summaries, summaries_data, metadata_key, datapoints, summary, disp) # Catching bad posts if new_meta is None: raise Exception("Post metadata empty, possible problem with user and key") if lenght_post == 0: self.add2log("No new datapoints skipping posting for efficiency") return step_size = 100 for step in range(0, lenght_post, step_size): chunk_datapoints = {} for event_type in datapoints.keys(): chunk_datapoints[event_type] = {} if len(datapoints[event_type].keys())>0: pointsconsider = sorted(datapoints[event_type].keys())[step:step+step_size] for point in pointsconsider: chunk_datapoints[event_type][point] = datapoints[event_type][point] self.postBulkData(new_meta, metadata_key, chunk_datapoints, disp=False) # Publish to MQ if self.mq and new_meta != None: self.publishToMq(arguments, event_types, chunk_datapoints, summaries_data)
class ActiveMQUploader(Uploader): def __init__(self, start = 1600, connect = 'iut2-net3.iu.edu', metricName='org.osg.general-perfsonar-simple.conf'): Uploader.__init__(self, start, connect, metricName) self.maxMQmessageSize = self.readConfigFile('mq-max-message-size') #Code to allow publishing data to the mq self.mq = None self.dq = self.readConfigFile('directoryqueue') self.granularity = int(self.readConfigFile('granularity')) if self.dq != None and self.dq!='None': try: self.mq = DQS(path=self.dq, granularity=self.granularity) except Exception as e: self.add2log("Unable to create dirq %s, exception was %s, " % (self.dq, e)) # Publish summaries to Mq def publishSToMq(self, arguments, event_types, summaries, summaries_data): # the max size limit in KB but python expects it in bytes size_limit = self.maxMQmessageSize * 1000 for event in summaries_data.keys(): if not summaries_data[event]: continue msg_head = { 'input-source' : arguments['input_source'], 'input-destination' : arguments['input_destination'], 'org_metadata_key' : arguments['org_metadata_key'], 'event-type' : event, 'rsv-timestamp' : "%s" % time.time(), 'summaries' : 1, 'destination' : '/topic/perfsonar.summary.' + event } msg_body = { 'meta': arguments } msg_body['summaries'] = summaries_data[event] size_summ = self.total_size(summaries_data[event]) msg = Message(body=json.dumps(msg_body), header=msg_head) size_msg = msg.size() #self.add2log("Message size: %s" % size_msg) # if size of the message is larger than 10MB discarrd if size_msg > size_limit or sys.getsizeof(json.dumps(msg_body)) > size_limit or size_summ > size_limit: self.add2log("Size of message body bigger than limit, discarding") continue # add to mq try: self.mq.add_message(msg) except Exception as e: self.add2log("Failed to add message to mq %s, exception was %s" % (self.dq, e)) # Publish message to Mq def publishRToMq(self, arguments, event_types, datapoints): for event in datapoints.keys(): # filter events for mq (must be subset of the probe's filter) if event not in self.allowedEvents: continue # skip events that have no datapoints if not datapoints[event]: continue # compose msg msg_head = { 'input-source' : arguments['input_source'], 'input-destination' : arguments['input_destination'], 'org_metadata_key' : arguments['org_metadata_key'], # including the timestart of the smalles measureement 'ts_start' : min(datapoints[event].keys()), 'event-type' : event, 'rsv-timestamp' : "%s" % time.time(), 'summaries' : 0, 'destination' : '/topic/perfsonar.raw.' + event} msg_body = { 'meta': arguments } msg_body['datapoints'] = datapoints[event] msg = Message(body=json.dumps(msg_body), header=msg_head) # add to mq try: self.mq.add_message(msg) except Exception as e: self.add2log("Failed to add message to mq %s, exception was %s" % (self.dq, e)) def postData(self, arguments, event_types, summaries, summaries_data, metadata_key, datapoints): summary= self.summary disp = self.debug lenght_post = -1 arguments['org_metadata_key'] = metadata_key for event_type in datapoints.keys(): if len(datapoints[event_type])>lenght_post: lenght_post = len(datapoints[event_type]) if lenght_post == 0: self.add2log("No new datapoints skipping posting for efficiency") return if self.mq and summaries_data: self.add2log("posting new summaries") self.publishSToMq(arguments, event_types, summaries, summaries_data) step_size = 100 for step in range(0, lenght_post, step_size): chunk_datapoints = {} for event_type in datapoints.keys(): chunk_datapoints[event_type] = {} if len(datapoints[event_type].keys())>0: pointsconsider = sorted(datapoints[event_type].keys())[step:step+step_size] for point in pointsconsider: chunk_datapoints[event_type][point] = datapoints[event_type][point] if self.mq: self.publishRToMq(arguments, event_types, chunk_datapoints) # Updating the checkpoint files for each host/metric and metadata for event_type in datapoints.keys(): if len(datapoints[event_type].keys()) > 0: if event_type not in self.time_starts: self.time_starts[event_type] = 0 next_time_start = max(datapoints[event_type].keys())+1 if next_time_start > self.time_starts[event_type]: self.time_starts[event_type] = int(next_time_start) f = open(self.tmpDir + metadata_key, 'w') f.write(json.dumps(self.time_starts)) f.close() self.add2log("posting NEW METADATA/DATA to Cern Active MQ %s" % metadata_key)