def terminate(self): """ Methods added when Pollers were reimplemented to run as multi-threaded rather than multiprocessing. This would be a slightly blocking call - wait for the thread to finish. """ self._stopFlag = True self.join(self._threadSleepTime + 0.1) if self.is_alive(): logging.error("Thread %s refuses to finish, continuing." % self.__class__.__name__) else: logging.debug("Thread %s finished." % self.__class__.__name__) # deregister with the receiver # (was true for multiprocessing implemention: # has to create a new sender instance and unregister the name. # self.sender instance was created in different thread in run()) sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) sender.unregister() # if messages weren't consumed, this should get rid of them del sender del self.sender
def shutdown(self): """ This method is called from main AlertGenerator process to unregister senders with receiver. Has to create a new sender instance and unregister the name. self.sender instance created in poll() is not visible to this process. """ sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) sender.unregister()
def run(self): """ This method is called from the AlertGenerator component instance and is entry point for a thread. """ # when running with multiprocessing, this was necessary, stick to it # with threading as well - may create some thread-safety issues in ZMQ ... self.sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) self.sender.register() counter = self.config.pollInterval # want to periodically check whether the thread should finish, # would be impossible to terminate a sleeping thread while not self._stopFlag: if counter == self.config.pollInterval: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls self.check() counter -= self._threadSleepTime if counter <= 0: counter = self.config.pollInterval if self._stopFlag: break time.sleep(self._threadSleepTime)
def testSenderBasic(self): """ Immediate testing register, unregister messages. Alert messages tested as saved in the queue. """ nAlerts = 10 # start Receiver, handler is Queue # wait for control messages to arrive and test immediately self.alertsQueue = Queue() handler = lambda x: self.alertsQueue.put(x) self.receiver = Receiver(self.addr, handler, self.control) self.receiver.startReceiver() # non blocking call # instantiate sender and send ... s = Sender(self.addr, "Sender_t", self.control) # nothing is registered up to now with the Receiver self.assertEqual(len(self.receiver._receiver._registSenders), 0) s.register() # test that RegisterMsg arrived, consider delay while len(self.receiver._receiver._registSenders) == 0: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 1) # send some alerts for i in range(0, nAlerts): a = Alert(Level = i, Type = "Alert") s(a) # actual alert message sending s.unregister() while len(self.receiver._receiver._registSenders) == 1: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 0) # this makes sure that Receiver waits certain delay even after shutdown # is received if there is no more messages coming self.receiver.shutdown() # check received alerts in the Queue qSize = 0 while True: try: self.alertsQueue.get(block = False) qSize += 1 except queues.Empty: break # .qsize() is not properly implemented in Python 2.7, on e.g. Mac OS #self.assertEqual(nAlerts, self.alertsQueue.qsize()) self.assertEqual(nAlerts, qSize)
def __init__(self, config): self.config = config self.address = config.address logging.info("Instantiating ...") self.label = getattr(config, "label", None) self.controlAddr = getattr(config, "controlAddr", None) self.sender = Sender(self.address, controller = self.controlAddr, label = self.label) logging.info("Initialized.")
def worker(addr, ctrl, nAlerts, workerId="ForwardSinkTestSource"): """ Send a few alerts. """ s = Sender(addr, ctrl, workerId) s.register() d = dict(very="interesting") [s(Alert(Type="Alert", Level=i, Details=d)) for i in range(0, nAlerts)] s.unregister() s.sendShutdown()
def simpleWorker(addr, ctrl): """ Sender that pauses and sends a shutdown message. """ s = Sender(addr, "Processor_t", ctrl) s.register() s.unregister() s.sendShutdown()
def run(self): """ This method is called from the AlertGenerator component instance and is entry point for a thread. """ logging.info("Thread %s started - run method." % self.__class__.__name__) # when running with multiprocessing, it was necessary to create the # sender instance in the same context. Stick to it with threading # as well - may create some thread-safety issues in ZMQ ... self.sender = Sender(self.generator.config.Alert.address, self.generator.config.Alert.controlAddr, self.__class__.__name__) self.sender.register() logging.info( "Thread %s alert sender created: alert addr: %s " "control addr: %s" % (self.__class__.__name__, self.generator.config.Alert.address, self.generator.config.Alert.controlAddr)) counter = self.config.pollInterval # want to periodically check whether the thread should finish, # would be impossible to terminate a sleeping thread while not self._stopFlag: if counter == self.config.pollInterval: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls try: logging.debug("Poller %s check ..." % self.__class__.__name__) self.check() except Exception as ex: self._handleFailedPolling(ex) counter -= self._threadSleepTime if counter <= 0: counter = self.config.pollInterval if self._stopFlag: break time.sleep(self._threadSleepTime) logging.info("Thread %s - work loop terminated, finished." % self.__class__.__name__)
def testSenderNonBlockingWhenReceiverNotAvailable(self): """ Repeatedly instantiate Sender, register, send alerts, etc and test that the Sender is not blocking due to undelivered messages since no Receiver is available. This test shall wait (between iterations) only delay specified in the Sender (was hanging indefinitely due to -1 default value). """ for i in range(2): nAlerts = 3 # instantiate sender and send ... s = Sender(self.addr, "Sender_t", self.control) s.register() # send some alerts for i in range(0, nAlerts): a = Alert(Level = 10, Type = "Alert") s(a) # actual alert message sending s.unregister() # call destructor explicitly, the hanging should not occur here del s
def setUpAlertsMessaging(compInstance, compName=None): """ Set up Alerts Sender instance, etc. Depends on provided configuration (general section 'Alert'). Should not break anything if such config is not provided. compInstance is instance of the various agents components which set up alerts messaging. Details about the calling components are referenced through this variable (e.g. configuration instance). compName is string containing name of the component. Method is made static since not all components' classes participating in alerts messaging inherit from this class. """ callerClassName = compInstance.__class__.__name__ if hasattr(compInstance, "config") and hasattr(compInstance.config, "Alert"): # pre-defined values for Alert instances comp = compName or callerClassName dictAlert = dict(Type="WMAgent", Workload="n/a", Component=comp, Source=callerClassName) preAlert = getPredefinedAlert(**dictAlert) # create sender instance (sending alert messages) # (2011-09-xx): # the import has been put here in order to avoid Alerts->ZMQ # dependencies in cases that Alerts are unwanted anyway # the import shall be put back up later once the issue disappears from WMCore.Alerts.ZMQ.Sender import Sender logging.info("Creating Alerts Sender instance ...") sender = Sender(compInstance.config.Alert.address, compInstance.config.Alert.controlAddr, callerClassName) sender.register() logging.info("Alerts messaging set up for %s" % callerClassName) return preAlert, sender else: logging.info("Alerts messaging not enabled for %s" % callerClassName) return None, None
def setUpAlertsMessaging(compInstance, compName = None): """ Set up Alerts Sender instance, etc. Depends on provided configuration (general section 'Alert'). Should not break anything if such config is not provided. compInstance is instance of the various agents components which set up alerts messaging. Details about the calling components are referenced through this variable (e.g. configuration instance). compName is string containing name of the component. Method is made static since not all components' classes participating in alerts messaging inherit from this class. """ callerClassName = compInstance.__class__.__name__ if hasattr(compInstance, "config") and hasattr(compInstance.config, "Alert"): # pre-defined values for Alert instances comp = compName or callerClassName dictAlert = dict(Type = "WMAgent", Workload = "n/a", Component = comp, Source = callerClassName) preAlert = getPredefinedAlert(**dictAlert) # create sender instance (sending alert messages) # (2011-09-xx): # the import has been put here in order to avoid Alerts->ZMQ # dependencies in cases that Alerts are unwanted anyway # the import shall be put back up later once the issue disappears from WMCore.Alerts.ZMQ.Sender import Sender logging.info("Creating Alerts Sender instance ...") sender = Sender(compInstance.config.Alert.address, compInstance.config.Alert.controlAddr, callerClassName) sender.register() logging.info("Alerts messaging set up for %s" % callerClassName) return preAlert, sender else: logging.info("Alerts messaging not enabled for %s" % callerClassName) return None, None
def worker(addr, ctrl, nAlerts, workerId = "ForwardSinkTestSource"): """ Send a few alerts. """ s = Sender(addr, ctrl, workerId) s.register() d = dict(very = "interesting") [s(Alert(Type = "Alert", Level = i, Details = d)) for i in range(0, nAlerts)] s.unregister() s.sendShutdown()
def testSenderReceiverBasic(self): sender = Sender(self.config.Alert.address, self.config.Alert.controlAddr, self.__class__.__name__) handler, receiver = utils.setUpReceiver(self.config.Alert.address, self.config.Alert.controlAddr) a = Alert(Component=inspect.stack()[0][3]) sender(a) while len(handler.queue) == 0: time.sleep(0.5) print "%s waiting for alert to arrive" % inspect.stack()[0][3] receiver.shutdown() self.assertEqual(len(handler.queue), 1) self.assertEqual(handler.queue[0]["Component"], inspect.stack()[0][3])
def worker(addr, ctrl, nAlerts, workerId = "Processor_t"): """ Instantiate an alert Sender instance and register with Received instance identified by addr (alerts channel), ctrl (control channel) addresses. Then send a desired amount of alerts, unregister and send Shutdown control message instructing the Receive to stop and release sockets. """ s = Sender(addr, ctrl, workerId) s.register() for i in range(0, nAlerts): a = Alert(Type = "Alert", Level = i) s(a) s.unregister() s.sendShutdown()
def worker(addr, ctrl, nAlerts, workerId = "Processor_t"): """ Send a few alerts. """ s = Sender(addr, ctrl, workerId) s.register() for i in range(0, nAlerts): a = Alert(Type = "Alert", Level = i) s(a) s.unregister() s.sendShutdown()
def testSenderBasic(self): """ Immediate testing register, unregister messages. Alert messages tested as saved in the queue. """ nAlerts = 10 # start Receiver, handler is list for alerts # wait for control messages to arrive and test immediately alertsQueue = [] handler = lambda x: alertsQueue.append(x) self.receiver = Receiver(self.addr, handler, self.control) self.receiver.startReceiver() # non blocking call # instantiate sender and send ... s = Sender(self.addr, "Sender_t", self.control) # nothing is registered up to now with the Receiver self.assertEqual(len(self.receiver._receiver._registSenders), 0) s.register() # test that RegisterMsg arrived, consider delay while len(self.receiver._receiver._registSenders) == 0: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 1) # send some alerts for i in range(0, nAlerts): a = Alert(Level = i, Type = "Alert") s(a) # actual alert message sending s.unregister() while len(self.receiver._receiver._registSenders) == 1: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 0) # this makes sure that Receiver waits certain delay even after shutdown # is received if there is no more messages coming self.receiver.shutdown() self.assertEqual(nAlerts, len(alertsQueue))
def testProcessorWithReceiver(self): """ Test startup and shutdown of processor in receiver. """ processor = Processor(self.config.AlertProcessor) # Receiver is waited for shutdown / shutdown explicitly in tearDown() self.receiver = Receiver(self.addr, processor, self.ctrl) self.receiver.startReceiver() # non-blocking call # now sender tests control messages (register, unregister, shutdown) s = Sender(self.addr, self.ctrl, "Processor_t") s.register() s.unregister() s.sendShutdown() # give some time so that the previous call shuts down the receiver time.sleep(ReceiverLogic.TIMEOUT_AFTER_SHUTDOWN * 1.1)
def poll(self): """ This method is called from the AlertGenerator component instance and is entry point for different process. Sender instance needs to be created here. Each poller instance has its own sender instance. """ self.sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) self.sender.register() while True: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls self.check() time.sleep(self.config.pollInterval)
def testBasePollerHandleFailedPolling(self): config = getConfig("/tmp") # create some non-sence config section. just need a bunch of values defined config.AlertGenerator.section_("bogusPoller") config.AlertGenerator.bogusPoller.soft = 5 # [percent] config.AlertGenerator.bogusPoller.critical = 50 # [percent] config.AlertGenerator.bogusPoller.pollInterval = 2 # [second] config.AlertGenerator.bogusPoller.period = 10 generator = utils.AlertGeneratorMock(config) poller = BasePoller(config.AlertGenerator.bogusPoller, generator) ex = Exception("test exception") class Sender(object): def __call__(self, alert): self.alert = alert poller.sender = Sender() poller._handleFailedPolling(ex) self.assertEqual(poller.sender.alert["Source"], "BasePoller")
def testProcessorWithReceiver(self): """ Test startup and shutdown of processor in receiver. """ processor = Processor(self.config.AlertProcessor) rec = Receiver(self.addr, processor, self.ctrl) rec.startReceiver() # non-blocking call # now sender tests control messages (register, unregister, shutdown) s = Sender(self.addr, self.ctrl, "Processor_t") s.register() s.unregister() s.sendShutdown() # wait until the Receiver is shut by sending the above control messages while rec.isReady(): time.sleep(0.3) print "%s waiting for Receiver to shut ..." % inspect.stack()[0][3]
def run(self): """ This method is called from the AlertGenerator component instance and is entry point for a thread. """ logging.info("Thread %s started - run method." % self.__class__.__name__) # when running with multiprocessing, it was necessary to create the # sender instance in the same context. Stick to it with threading # as well - may create some thread-safety issues in ZMQ ... self.sender = Sender(self.generator.config.Alert.address, self.generator.config.Alert.controlAddr, self.__class__.__name__) self.sender.register() logging.info("Thread %s alert sender created: alert addr: %s " "control addr: %s" % (self.__class__.__name__, self.generator.config.Alert.address, self.generator.config.Alert.controlAddr)) counter = self.config.pollInterval # want to periodically check whether the thread should finish, # would be impossible to terminate a sleeping thread while not self._stopFlag: if counter == self.config.pollInterval: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls try: logging.debug("Poller %s check ..." % self.__class__.__name__) self.check() except Exception as ex: self._handleFailedPolling(ex) counter -= self._threadSleepTime if counter <= 0: counter = self.config.pollInterval if self._stopFlag: break time.sleep(self._threadSleepTime) logging.info("Thread %s - work loop terminated, finished." % self.__class__.__name__)
def testSenderNonBlockingWhenReceiverNotAvailable(self): """ Repeatedly instantiate Sender, register, send alerts, etc and test that the Sender is not blocking due to undelivered messages since no Receiver is available. This test shall wait (between iterations) only delay specified in the Sender. """ iterations = 2 nAlerts = 3 for i in range(iterations): # instantiate sender and send ... s = Sender(self.addr, self.control, "Sender_t") s.register() # send some alerts for i in range(0, nAlerts): a = Alert(Level=10, Type="Alert") s(a) # actual alert message sending s.unregister() # call destructor explicitly, the hanging should not occur here del s
def testSenderBasic(self): """ Immediate testing register, unregister messages. Alert messages tested as saved in the queue. """ nAlerts = 10 # start Receiver, handler is list for alerts # wait for control messages to arrive and test immediately alertsQueue = [] handler = lambda x: alertsQueue.append(x) self.receiver = Receiver(self.addr, handler, self.control) self.receiver.startReceiver() # non blocking call # instantiate sender and send ... s = Sender(self.addr, self.control, "Sender_t") # nothing is registered up to now with the Receiver self.assertEqual(len(self.receiver._receiver._registSenders), 0) s.register() # test that RegisterMsg arrived, consider delay while len(self.receiver._receiver._registSenders) == 0: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 1) # send some alerts for i in range(0, nAlerts): a = Alert(Level=i, Type="Alert") s(a) # actual alert message sending s.unregister() while len(self.receiver._receiver._registSenders) == 1: time.sleep(0.2) self.assertEqual(len(self.receiver._receiver._registSenders), 0) # this makes sure that Receiver waits certain delay even after shutdown # is received if there is no more messages coming self.receiver.shutdown() self.assertEqual(nAlerts, len(alertsQueue))
def testForwardSinkEntireChain(self): """ The test chain looks as follows: worker -> Receiver1(+its Processor configured to do ForwardSink) -> Receiver2 whose address as the destination the ForwardSink is configured with -> Receiver2 will do FileSink so that it's possible to verify the chain. """ # configuration for the Receiver+Processor+ForwardSink 1 (group) config1 = Configuration() config1.component_("AlertProcessor") config1.AlertProcessor.section_("critical") config1.AlertProcessor.section_("soft") config1.AlertProcessor.critical.level = 5 config1.AlertProcessor.soft.level = 0 config1.AlertProcessor.soft.bufferSize = 0 config1.AlertProcessor.critical.section_("sinks") config1.AlertProcessor.soft.section_("sinks") config1.AlertProcessor.critical.sinks.section_("forward") config1.AlertProcessor.soft.sinks.section_("forward") # address of the Receiver2 config1.AlertProcessor.critical.sinks.forward.address = self.address2 config1.AlertProcessor.critical.sinks.forward.controlAddr = self.controlAddr2 config1.AlertProcessor.critical.sinks.forward.label = "ForwardSinkTest" config1.AlertProcessor.soft.sinks.forward.address = self.address2 config1.AlertProcessor.soft.sinks.forward.controlAddr = self.controlAddr2 config1.AlertProcessor.soft.sinks.forward.label = "ForwardSinkTest" # 1) first item of the chain is source of Alerts: worker() # 2) second item is Receiver1 + its Processor + its ForwardSink processor1 = Processor(config1.AlertProcessor) # ForwardSink will be created automatically by the Processor receiver1 = Receiver(self.address1, processor1, self.controlAddr1) receiver1.startReceiver() # non blocking call # 3) third group is Receiver2 with its Processor and final FileSink config2 = Configuration() config2.component_("AlertProcessor") config2.AlertProcessor.section_("critical") config2.AlertProcessor.section_("soft") config2.AlertProcessor.critical.level = 5 config2.AlertProcessor.soft.level = 0 config2.AlertProcessor.soft.bufferSize = 0 config2.AlertProcessor.critical.section_("sinks") config2.AlertProcessor.soft.section_("sinks") config2.AlertProcessor.critical.sinks.section_("file") config2.AlertProcessor.soft.sinks.section_("file") # configuration of the final sink config2.AlertProcessor.critical.sinks.file.outputfile = self.outputfileCritical config2.AlertProcessor.soft.sinks.file.outputfile = self.outputfileSoft processor2 = Processor(config2.AlertProcessor) # final FileSink will be automatically created by the Processor receiver2 = Receiver(self.address2, processor2, self.controlAddr2) receiver2.startReceiver() # non blocking call # now send the Alert messages via worker() and eventually shut the receiver1 worker(self.address1, self.controlAddr1, 10) # wait until receiver1 shuts while receiver1.isReady(): time.sleep(0.4) print "%s waiting for Receiver1 to shut ..." % inspect.stack( )[0][3] # shut down receiver2 - need to sendShutdown() to it s = Sender(self.address2, self.controlAddr2, "some_id") s.sendShutdown() # wait until receiver2 shuts while receiver2.isReady(): time.sleep(0.4) print "%s waiting for Receiver2 to shut ..." % inspect.stack( )[0][3] # check the result in the files # the bufferSize for soft-level Alerts was set to 0 so all # Alerts should be present also in the soft-level type file # initial 10 Alerts (Level 0 .. 9) gets distributed though a cascade # of two Receivers. soft alerts with level 0 .. 4 are considered # so Receiver1 forwards through its ForwardSink 0 .. 4 Alerts as soft and # 5 .. 9 level Alerts through 'critical'. order is not guaranteed # critical Alerts fileConfig = ConfigSection("file") fileConfig.outputfile = self.outputfileCritical sink = FileSink(fileConfig) expectedLevels = range(5, 10) # that is 5 .. 9 loadAlerts = sink.load() self.assertEqual(len(loadAlerts), len(expectedLevels)) d = dict(very="interesting") for a in loadAlerts: self.assertEqual(a["Details"], d) # soft Alerts fileConfig = ConfigSection("file") fileConfig.outputfile = self.outputfileSoft sink = FileSink(fileConfig) expectedLevels = range(0, 5) # that is 0 .. 4 loadAlerts = sink.load() self.assertEqual(len(loadAlerts), len(expectedLevels)) for a in loadAlerts: self.assertEqual(a["Details"], d)
class BasePoller(threading.Thread): """ Base class for various pollers running as Thread. Each poller creates own Sender instance. Starting from Thread entry point method run(), methods run in different thread contexts. The only shared variable shall be _stopFlag. """ def __init__(self, config, generator): threading.Thread.__init__(self) # it's particular Poller config only self.config = config # reference to AlertGenerator instance self.generator = generator # store levels (critical, soft) for critical, soft thresholds correspondence # these values are defined in the AlertProcessor config # self.levels and self.thresholds has to have the same corresponding order # and critical has to be first - if this threshold is caught, no point # testing soft one # this belongs to the AlertGenerator and is in fact dependent on AlertProcessor # by referencing these two values - not sure if to tolerate such dependecy or # configure these two values independently in AlertGenerator itself (surely a # possible mismatch would make a bit of chaos) self.levels = [self.generator.config.AlertProcessor.critical.level, self.generator.config.AlertProcessor.soft.level] # critical, soft threshold values self.thresholds = [self.config.critical, self.config.soft] # pre-generated alert values, but before sending always new instance is created # these values are used to update the newly created instance dictAlert = dict(Type = "WMAgent", Workload = "n/a", Component = self.generator.__class__.__name__, Source = "<to_overwrite>") self.preAlert = alertAPI.getPredefinedAlert(**dictAlert) # flag controlling run of the Thread self._stopFlag = False # thread own sleep time self._threadSleepTime = 0.2 # seconds def _handleFailedPolling(self, ex): """ Handle (log and send alert) if polling failed. """ trace = traceback.format_exception(*sys.exc_info()) traceString = '\n '.join(trace) errMsg = ("Polling failed in %s, reason: %s" % (self.__class__.__name__, ex)) logging.error("%s\n%s" % (errMsg, traceString)) a = Alert(**self.preAlert) a["Source"] = self.__class__.__name__ a["Timestamp"] = time.time() a["Details"] = dict(msg = errMsg) a["Level"] = 10 logging.info("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) def run(self): """ This method is called from the AlertGenerator component instance and is entry point for a thread. """ logging.info("Thread %s started - run method." % self.__class__.__name__) # when running with multiprocessing, it was necessary to create the # sender instance in the same context. Stick to it with threading # as well - may create some thread-safety issues in ZMQ ... self.sender = Sender(self.generator.config.Alert.address, self.generator.config.Alert.controlAddr, self.__class__.__name__) self.sender.register() logging.info("Thread %s alert sender created: alert addr: %s " "control addr: %s" % (self.__class__.__name__, self.generator.config.Alert.address, self.generator.config.Alert.controlAddr)) counter = self.config.pollInterval # want to periodically check whether the thread should finish, # would be impossible to terminate a sleeping thread while not self._stopFlag: if counter == self.config.pollInterval: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls try: logging.debug("Poller %s check ..." % self.__class__.__name__) self.check() except Exception, ex: self._handleFailedPolling(ex) counter -= self._threadSleepTime if counter <= 0: counter = self.config.pollInterval if self._stopFlag: break time.sleep(self._threadSleepTime) logging.info("Thread %s - work loop terminated, finished." % self.__class__.__name__)
import time from WMCore.Alerts import API as alertAPI from WMCore.Alerts.Alert import Alert from WMCore.Alerts.ZMQ.Sender import Sender machine = "maxatest.cern.ch" target = "tcp://%s:6557" % machine targetController = "tcp://%s:6559" % machine if len(sys.argv) > 2: target = sys.argv[1] targetController = sys.argv[2] dictAlert = dict(Type="AlertTestClient", Workload="n/a", Component=__name__, Source=__name__) preAlert = alertAPI.getPredefinedAlert(**dictAlert) sender = Sender(target, targetController, "AlertTestClient") print("created Sender client for alerts target: %s controller: %s" % (target, targetController)) sender.register() a = Alert(**preAlert) a["Timestamp"] = time.time() a["Level"] = 6 print "sending alert:\n'%s'" % a sender(a) sender.unregister()
import sys import time from WMCore.Alerts import API as alertAPI from WMCore.Alerts.Alert import Alert from WMCore.Alerts.ZMQ.Sender import Sender machine = "maxatest.cern.ch" target = "tcp://%s:6557" % machine targetController = "tcp://%s:6559" % machine if len(sys.argv) > 2: target = sys.argv[1] targetController = sys.argv[2] dictAlert = dict(Type="AlertTestClient", Workload="n/a", Component=__name__, Source=__name__) preAlert = alertAPI.getPredefinedAlert(**dictAlert) sender = Sender(target, targetController, "AlertTestClient") print ("created Sender client for alerts target: %s controller: %s" % (target, targetController)) sender.register() a = Alert(**preAlert) a["Timestamp"] = time.time() a["Level"] = 6 print "sending alert:\n'%s'" % a sender(a) sender.unregister()
class BasePoller(object): """ Base class for various pollers. Class provides esp. entry point poll() method from which poller's life starts in a background process and Sender instance. Methods of this class as well as of the inherited ones run in different process contexts. The attributes are not shared and if accessed from both contexts, the initial values are taken (as set up in the initial process) and then modified in the later (polling) process context. """ def __init__(self, config, generator): # it's particular Poller config only self.config = config # reference to AlertGenerator instance self.generator = generator # store levels (critical, soft) for critical, soft thresholds correspondence # these values are defined in the AlertProcessor config # self.levels and self.thresholds has to have the same corresponding order # and critical has to be first - if this threshold is caught, no point testing soft one self.levels = [self.generator.config.AlertProcessor.critical.level, self.generator.config.AlertProcessor.soft.level] # critical, soft threshold values self.thresholds = [self.config.critical, self.config.soft] # pre-generated alert values, but before sending always new instance is created # these values are used to update the newly created instance dictAlert = dict(Type = "WMAgent", Workload = "n/a", Component = self.generator.__class__.__name__, Source = "<to_overwrite>") self.preAlert = alertAPI.getPredefinedAlert(**dictAlert) def poll(self): """ This method is called from the AlertGenerator component instance and is entry point for different process. Sender instance needs to be created here. Each poller instance has its own sender instance. """ self.sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) self.sender.register() while True: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls self.check() time.sleep(self.config.pollInterval) def shutdown(self): """ This method is called from main AlertGenerator process to unregister senders with receiver. Has to create a new sender instance and unregister the name. self.sender instance created in poll() is not visible to this process. """ sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) sender.unregister()
def testForwardSinkEntireChain(self): """ The test chain looks as follows: worker -> Receiver1(+its Processor configured to do ForwardSink) -> Receiver2 whose address as the destination the ForwardSink is configured with -> Receiver2 will do FileSink so that it's possible to verify the chain. """ # configuration for the Receiver+Processor+ForwardSink 1 (group) config1 = Configuration() config1.component_("AlertProcessor") config1.AlertProcessor.section_("critical") config1.AlertProcessor.section_("soft") config1.AlertProcessor.critical.level = 5 config1.AlertProcessor.soft.level = 0 config1.AlertProcessor.soft.bufferSize = 0 config1.AlertProcessor.critical.section_("sinks") config1.AlertProcessor.soft.section_("sinks") config1.AlertProcessor.critical.sinks.section_("forward") config1.AlertProcessor.soft.sinks.section_("forward") # address of the Receiver2 config1.AlertProcessor.critical.sinks.forward.address = self.address2 config1.AlertProcessor.critical.sinks.forward.controlAddr = self.controlAddr2 config1.AlertProcessor.critical.sinks.forward.label = "ForwardSinkTest" config1.AlertProcessor.soft.sinks.forward.address = self.address2 config1.AlertProcessor.soft.sinks.forward.controlAddr = self.controlAddr2 config1.AlertProcessor.soft.sinks.forward.label = "ForwardSinkTest" # 1) first item of the chain is source of Alerts: worker() # 2) second item is Receiver1 + its Processor + its ForwardSink processor1 = Processor(config1.AlertProcessor) # ForwardSink will be created automatically by the Processor receiver1 = Receiver(self.address1, processor1, self.controlAddr1) receiver1.startReceiver() # non blocking call # 3) third group is Receiver2 with its Processor and final FileSink config2 = Configuration() config2.component_("AlertProcessor") config2.AlertProcessor.section_("critical") config2.AlertProcessor.section_("soft") config2.AlertProcessor.critical.level = 5 config2.AlertProcessor.soft.level = 0 config2.AlertProcessor.soft.bufferSize = 0 config2.AlertProcessor.critical.section_("sinks") config2.AlertProcessor.soft.section_("sinks") config2.AlertProcessor.critical.sinks.section_("file") config2.AlertProcessor.soft.sinks.section_("file") # configuration of the final sink config2.AlertProcessor.critical.sinks.file.outputfile = self.outputfileCritical config2.AlertProcessor.soft.sinks.file.outputfile = self.outputfileSoft processor2 = Processor(config2.AlertProcessor) # final FileSink will be automatically created by the Processor receiver2 = Receiver(self.address2, processor2, self.controlAddr2) receiver2.startReceiver() # non blocking call # now send the Alert messages via worker() and eventually shut the receiver1 worker(self.address1, self.controlAddr1, 10) # wait until receiver1 shuts while receiver1.isReady(): time.sleep(0.4) print "%s waiting for Receiver1 to shut ..." % inspect.stack()[0][3] # shut down receiver2 - need to sendShutdown() to it s = Sender(self.address2, self.controlAddr2, "some_id") s.sendShutdown() # wait until receiver2 shuts while receiver2.isReady(): time.sleep(0.4) print "%s waiting for Receiver2 to shut ..." % inspect.stack()[0][3] # check the result in the files # the bufferSize for soft-level Alerts was set to 0 so all # Alerts should be present also in the soft-level type file # initial 10 Alerts (Level 0 .. 9) gets distributed though a cascade # of two Receivers. soft alerts with level 0 .. 4 are considered # so Receiver1 forwards through its ForwardSink 0 .. 4 Alerts as soft and # 5 .. 9 level Alerts through 'critical'. order is not guaranteed # critical Alerts fileConfig = ConfigSection("file") fileConfig.outputfile = self.outputfileCritical sink = FileSink(fileConfig) expectedLevels = range(5, 10) # that is 5 .. 9 loadAlerts = sink.load() self.assertEqual(len(loadAlerts), len(expectedLevels)) d = dict(very = "interesting") for a in loadAlerts: self.assertEqual(a["Details"], d) # soft Alerts fileConfig = ConfigSection("file") fileConfig.outputfile = self.outputfileSoft sink = FileSink(fileConfig) expectedLevels = range(0, 5) # that is 0 .. 4 loadAlerts = sink.load() self.assertEqual(len(loadAlerts), len(expectedLevels)) for a in loadAlerts: self.assertEqual(a["Details"], d)
class BasePoller(threading.Thread): """ Base class for various pollers running as Thread. Each poller creates own Sender instance. Starting from Thread entry point method run(), methods run in different thread contexts. The only shared variable shall be _stopFlag. """ def __init__(self, config, generator): threading.Thread.__init__(self) # it's particular Poller config only self.config = config # reference to AlertGenerator instance self.generator = generator # store levels (critical, soft) for critical, soft thresholds correspondence # these values are defined in the AlertProcessor config # self.levels and self.thresholds has to have the same corresponding order # and critical has to be first - if this threshold is caught, no point # testing soft one # this belongs to the AlertGenerator and is in fact dependent on AlertProcessor # by referencing these two values - not sure if to tolerate such dependecy or # configure these two values independently in AlertGenerator itself (surely a # possible mismatch would make a bit of chaos) self.levels = [self.generator.config.AlertProcessor.critical.level, self.generator.config.AlertProcessor.soft.level] # critical, soft threshold values self.thresholds = [self.config.critical, self.config.soft] # pre-generated alert values, but before sending always new instance is created # these values are used to update the newly created instance dictAlert = dict(Type = "WMAgent", Workload = "n/a", Component = self.generator.__class__.__name__, Source = "<to_overwrite>") self.preAlert = alertAPI.getPredefinedAlert(**dictAlert) # flag controlling run of the Thread self._stopFlag = False # thread own sleep time self._threadSleepTime = 0.5 # seconds def run(self): """ This method is called from the AlertGenerator component instance and is entry point for a thread. """ # when running with multiprocessing, this was necessary, stick to it # with threading as well - may create some thread-safety issues in ZMQ ... self.sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) self.sender.register() counter = self.config.pollInterval # want to periodically check whether the thread should finish, # would be impossible to terminate a sleeping thread while not self._stopFlag: if counter == self.config.pollInterval: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls self.check() counter -= self._threadSleepTime if counter <= 0: counter = self.config.pollInterval if self._stopFlag: break time.sleep(self._threadSleepTime) def terminate(self): """ Methods added when Pollers were reimplemented to run as multi-threaded rather than multiprocessing. This would be a slightly blocking call - wait for the thread to finish. """ self._stopFlag = True self.join(self._threadSleepTime + 0.1) if self.is_alive(): logging.error("Thread %s refuses to finish, continuing." % self.__class__.__name__) else: logging.debug("Thread %s finished." % self.__class__.__name__) # deregister with the receiver # (was true for multiprocessing implemention: # has to create a new sender instance and unregister the name. # self.sender instance was created in different thread in run()) sender = Sender(self.generator.config.Alert.address, self.__class__.__name__, self.generator.config.Alert.controlAddr) sender.unregister() # if messages weren't consumed, this should get rid of them del sender del self.sender
class BasePoller(threading.Thread): """ Base class for various pollers running as Thread. Each poller creates own Sender instance. Starting from Thread entry point method run(), methods run in different thread contexts. The only shared variable shall be _stopFlag. """ def __init__(self, config, generator): threading.Thread.__init__(self) # it's particular Poller config only self.config = config # reference to AlertGenerator instance self.generator = generator # store levels (critical, soft) for critical, soft thresholds correspondence # these values are defined in the AlertProcessor config # self.levels and self.thresholds has to have the same corresponding order # and critical has to be first - if this threshold is caught, no point # testing soft one # this belongs to the AlertGenerator and is in fact dependent on AlertProcessor # by referencing these two values - not sure if to tolerate such dependecy or # configure these two values independently in AlertGenerator itself (surely a # possible mismatch would make a bit of chaos) self.levels = [ self.generator.config.AlertProcessor.critical.level, self.generator.config.AlertProcessor.soft.level ] # critical, soft threshold values self.thresholds = [self.config.critical, self.config.soft] # pre-generated alert values, but before sending always new instance is created # these values are used to update the newly created instance dictAlert = dict(Type="WMAgent", Workload="n/a", Component=self.generator.__class__.__name__, Source="<to_overwrite>") self.preAlert = alertAPI.getPredefinedAlert(**dictAlert) # flag controlling run of the Thread self._stopFlag = False # thread own sleep time self._threadSleepTime = 0.2 # seconds def _handleFailedPolling(self, ex): """ Handle (log and send alert) if polling failed. """ trace = traceback.format_exception(*sys.exc_info()) traceString = '\n '.join(trace) errMsg = ("Polling failed in %s, reason: %s" % (self.__class__.__name__, ex)) logging.error("%s\n%s" % (errMsg, traceString)) a = Alert(**self.preAlert) a.setTimestamp() a["Source"] = self.__class__.__name__ a["Details"] = dict(msg=errMsg) a["Level"] = 10 logging.info("Sending an alert (%s): %s" % (self.__class__.__name__, a)) self.sender(a) def run(self): """ This method is called from the AlertGenerator component instance and is entry point for a thread. """ logging.info("Thread %s started - run method." % self.__class__.__name__) # when running with multiprocessing, it was necessary to create the # sender instance in the same context. Stick to it with threading # as well - may create some thread-safety issues in ZMQ ... self.sender = Sender(self.generator.config.Alert.address, self.generator.config.Alert.controlAddr, self.__class__.__name__) self.sender.register() logging.info( "Thread %s alert sender created: alert addr: %s " "control addr: %s" % (self.__class__.__name__, self.generator.config.Alert.address, self.generator.config.Alert.controlAddr)) counter = self.config.pollInterval # want to periodically check whether the thread should finish, # would be impossible to terminate a sleeping thread while not self._stopFlag: if counter == self.config.pollInterval: # it would feel that check() takes long time but there is # specified a delay in case of psutil percentage calls try: logging.debug("Poller %s check ..." % self.__class__.__name__) self.check() except Exception as ex: self._handleFailedPolling(ex) counter -= self._threadSleepTime if counter <= 0: counter = self.config.pollInterval if self._stopFlag: break time.sleep(self._threadSleepTime) logging.info("Thread %s - work loop terminated, finished." % self.__class__.__name__) def stop(self): """ Method sets the stopFlag so that run() while loop terminates at its next iteration. """ self._stopFlag = True def terminate(self): """ Methods added when Pollers were re-implemented to run as multi-threaded rather than multiprocessing. This would be a slightly blocking call - wait for the thread to finish. """ self._stopFlag = True # keep it here as well in case on terminate method is called logging.info("Thread %s terminate ..." % self.__class__.__name__) self.join(self._threadSleepTime + 0.1) if self.is_alive(): logging.error("Thread %s refuses to finish, continuing." % self.__class__.__name__) else: logging.info("Thread %s finished." % self.__class__.__name__) # deregister with the receiver # (was true for multiprocessing implementation: # has to create a new sender instance and unregister the name. # self.sender instance was created in different thread in run()) # TODO revise registering/deregistering business for production ... # remove unregistering (it seems to take long and wmcoreD which # give only limited time for a component to shutdown, and if entire # agent is being shutdown, there is no AlertProcessor to deregister with # anyway # logging.info("Thread %s sending unregister message ..." % self.__class__.__name__) # sender = Sender(self.generator.config.Alert.address, # self.generator.config.Alert.controlAddr, # self.__class__.__name__) # sender.unregister() # # if messages weren't consumed, this should get rid of them # del sender del self.sender logging.info("Thread %s terminate finished." % self.__class__.__name__)