def test_nrpe_poller(self): """ :return: """ self.print_header() # Obliged to call to get a self.logger... self.setup_with_file('cfg/cfg_default.cfg') self.assertTrue(self.conf_is_correct) my_module = self._setup_nrpe() manager = Manager() to_queue = manager.Queue() from_queue = manager.Queue() control_queue = Queue() # We prepare a check in the to_queue data = { 'is_a': 'check', 'status': 'queue', 'command': "$USER1$/check_nrpe -H localhost33 -n -u -t 5 -c check_load3 -a 20", 'timeout': 10, 'poller_tag': None, 't_to_go': time.time(), 'ref': None, } c = Check(data) msg = Message(_type='Do', data=c) to_queue.put(msg) # The worker will read a message by loop. We want it to do 2 loops, # so we fake a message, and the second message is a real exit one msg1 = Message(_type='Continue') msg2 = Message(_type='Die') control_queue.put(msg1) for _ in xrange(1, 2): control_queue.put(msg1) control_queue.put(msg2) # Call module working ... my_module.work(to_queue, from_queue, control_queue) chk = from_queue.get() self.assertEqual('done', chk.status) self.assertEqual(2, chk.exit_status)
def get_new_checks(self, queue, return_queue): """Get new checks if less than nb_checks_max If no new checks got and no check in queue, sleep for 1 sec REF: doc/alignak-action-queues.png (3) :return: None """ try: logger.debug("get_new_checks: %s / %s", len(self.checks), self.processes_by_worker) while len(self.checks) < self.processes_by_worker: msg = queue.get_nowait() if msg is None: time.sleep(0.01) continue logger.debug("Got a message: %s", msg) if msg.get_type() == 'Do': logger.debug("Got an action: %s", msg.get_data()) self.checks.append(msg.get_data()) self.actions_got += 1 elif msg.get_type() == 'ping': msg = Message(_type='pong', data='pong!', source=self._id) logger.debug("Queuing message: %s", msg) return_queue.put_nowait(msg) logger.debug("Queued") else: logger.warning("Ignoring message of type: %s", msg.get_type()) except Full: logger.warning("Actions queue is full") except Empty: logger.debug("Actions queue is empty") if not self.checks: self._idletime += 1 # Maybe the Queue() has been deleted by our master ? except (IOError, EOFError) as exp: logger.warning("My actions queue is no more available: %s", str(exp)) self.interrupted = True except Exception as exp: # pylint: disable=broad-except logger.error("Failed getting messages in actions queue: %s", str(exp)) logger.debug("get_new_checks exit")
def assign_to_a_queue(self, action): """Take an action and put it to action queue :param action: action to put :type action: alignak.action.Action :return: None """ msg = Message(_id=0, _type='Do', data=action) (index, queue) = self._got_queue_from_action(action) # Tag the action as "in the worker i" action.worker_id = index if queue is not None: queue.put(msg)
def get_new_checks(self, queue, return_queue): """Get new checks if less than nb_checks_max If no new checks got and no check in queue, sleep for 1 sec REF: doc/alignak-action-queues.png (3) :return: None """ try: logger.debug("get_new_checks: %s / %s", len(self.checks), self.processes_by_worker) while len(self.checks) < self.processes_by_worker: msg = queue.get_nowait() if msg is not None: logger.debug("Got a message: %s", msg) if msg.get_type() == 'Do': logger.debug("Got an action: %s", msg.get_data()) self.checks.append(msg.get_data()) self.actions_got += 1 elif msg.get_type() == 'ping': msg = Message(_type='pong', data='pong!', source=self._id) logger.debug("Queuing message: %s", msg) return_queue.put_nowait(msg) logger.debug("Queued") else: logger.warning("Ignoring message of type: %s", msg.get_type()) except Full: logger.warning("Actions queue is full") except Empty: logger.debug("Actions queue is empty") if not self.checks: self._idletime += 1 time.sleep(0.5) # Maybe the Queue() has been deleted by our master ? except (IOError, EOFError) as exp: logger.warning("My actions queue is no more available: %s", str(exp)) self.interrupted = True except Exception as exp: # pylint: disable=broad-except logger.error("Failed getting messages in actions queue: %s", str(exp)) logger.debug("get_new_checks exit")
def assign_to_a_queue(self, action): """Take an action and put it to a worker actions queue :param action: action to put :type action: alignak.action.Action :return: None """ (worker_id, queue) = self._get_queue_for_the_action(action) if not worker_id: return # Tag the action as "in the worker i" action.my_worker = worker_id action.status = ACT_STATUS_QUEUED msg = Message(_type='Do', data=action, source=self.name) logger.debug("Queuing message: %s", msg) queue.put_nowait(msg) logger.debug("Queued")
def manage_finished_checks(self, queue): """Check the status of checks if done, return message finished :) REF: doc/alignak-action-queues.png (5) :return: None """ to_del = [] wait_time = 1.0 now = time.time() logger.debug("--- manage finished checks") for action in self.checks: logger.debug("--- checking: last poll: %s, now: %s, wait_time: %s, action: %s", action.last_poll, now, action.wait_time, action) if action.status == ACT_STATUS_LAUNCHED and action.last_poll < now - action.wait_time: action.check_finished(self.max_plugins_output_length) wait_time = min(wait_time, action.wait_time) # If action done, we can launch a new one if action.status in [ACT_STATUS_DONE, ACT_STATUS_TIMEOUT]: logger.debug("--- check done/timeout: %s", action.uuid) self.actions_finished += 1 to_del.append(action) # We answer to the master try: msg = Message(_type='Done', data=action, source=self._id) logger.debug("Queuing message: %s", msg) queue.put_nowait(msg) logger.debug("Queued") except (IOError, EOFError) as exp: logger.warning("My returns queue is no more available: %s", str(exp)) # sys.exit(2) except Exception as exp: # pylint: disable=broad-except logger.error("Failed putting messages in returns queue: %s", str(exp)) else: logger.debug("--- not yet finished") for chk in to_del: logger.debug("--- delete check: %s", chk.uuid) self.checks.remove(chk) # Little sleep logger.debug("--- manage finished checks terminated, I will wait: %s", wait_time) time.sleep(wait_time)
def test_notification_timeout(self): """ Test timeout for notification sending :return: """ # Get a test service svc = self._sched.services.find_srv_by_name_and_hostname( "test_host_0", "test_ok_0_timeout") # These queues connect a poller/reactionner with a worker to_queue = Queue() from_queue = Queue() #manager.list() control_queue = Queue() # This test script plays the role of the reactionner # Now we "fork" a worker w = Worker(1, to_queue, from_queue, 1) w.uuid = 1 w.i_am_dying = False # We prepare a notification in the to_queue contact = Contact() contact.contact_name = "alignak" data = { 'uuid': 1, 'type': 'PROBLEM', 'status': 'scheduled', 'command': 'libexec/sleep_command.sh 7', 'command_call': '', 'ref': svc.uuid, 'contact': '', 't_to_go': 0.0 } n = Notification(data) n.status = "queue" n.t_to_go = time.time() n.contact = contact n.timeout = 2 n.env = {} n.exit_status = 0 n.module_type = "fork" # Send the job to the worker msg = Message(_type='Do', data=n) to_queue.put(msg) # Now we simulate the Worker's work() routine. We can't call it # as w.work() because it is an endless loop w.checks = [] w.returns_queue = from_queue w.slave_q = to_queue for i in xrange(1, 10): w.get_new_checks(to_queue, from_queue) # During the first loop the sleeping command is launched w.launch_new_checks() w.manage_finished_checks(from_queue) time.sleep(1) # The worker should have finished its job now, either correctly or with a timeout msg = from_queue.get() o = msg.get_data() self.assertEqual('timeout', o.status) self.assertEqual(3, o.exit_status) self.assertLess(o.execution_time, n.timeout + 1) # Let us be a good poller and clean up to_queue.close() control_queue.close() # Now look what the scheduler says about this self._sched.actions[n.uuid] = n self._sched.put_results(o) self.show_logs() self.assert_any_log_match( "Contact alignak service notification command " "'libexec/sleep_command.sh 7 ' timed out after 2 seconds")