def manage_returns(self): """ Wrapper function of do_manage_returns() :return: None TODO: Use a decorator for stat """ _t0 = time.time() self.do_manage_returns() statsmgr.incr('core.manage-returns', time.time() - _t0)
def get_new_actions(self): """ Wrapper function for do_get_new_actions For stats purpose :return: None TODO: Use a decorator """ _t0 = time.time() self.do_get_new_actions() statsmgr.incr('core.get-new-actions', time.time() - _t0)
def pynag_con_init(self, _id): """Wrapped function for do_pynag_con_init :param _id: scheduler _id to connect to :type _id: int :return: scheduler connection object or None :rtype: alignak.http.client.HTTPClient """ _t0 = time.time() res = self.do_pynag_con_init(_id) statsmgr.incr('con-init.scheduler', time.time() - _t0) return res
def pynag_con_init(self, _id, i_type='scheduler'): """Wrapper function for the real function do_ just for timing the connection :param _id: id :type _id: int :param i_type: type of item :type i_type: str :return: do_pynag_con_init return always True, so we return always True :rtype: bool """ _t0 = time.time() res = self.do_pynag_con_init(_id, i_type) statsmgr.incr('con-init.%s' % i_type, time.time() - _t0) return res
def do_loop_turn(self): """Satellite main loop:: * Setup new conf if necessary * Watch for new conf * Check and delete zombies actions / modules * Get returns from queues * Adjust worker number * Get new actions :return: None """ logger.debug("Loop turn") # Maybe the arbiter ask us to wait for a new conf # If true, we must restart all... if self.cur_conf is None: # Clean previous run from useless objects # and close modules self.clean_previous_run() self.wait_for_initial_conf() # we may have been interrupted or so; then # just return from this loop turn if not self.new_conf: return self.setup_new_conf() # Now we check if arbiter speak to us. # If so, we listen to it # When it push a conf, we reinit connections # Sleep in waiting a new conf :) # TODO: manage the diff again. while self.timeout > 0: begin = time.time() self.watch_for_new_conf(self.timeout) end = time.time() if self.new_conf: self.setup_new_conf() self.timeout = self.timeout - (end - begin) logger.debug(" ======================== ") self.timeout = self.polling_interval # Check if zombies workers are among us :) # If so: KILL THEM ALL!!! self.check_and_del_zombie_workers() # But also modules self.check_and_del_zombie_modules() # Print stats for debug for sched_id in self.schedulers: sched = self.schedulers[sched_id] for mod in self.q_by_mod: # In workers we've got actions send to queue - queue size for (index, queue) in self.q_by_mod[mod].items(): logger.debug("[%d][%s][%s] Stats: Workers:%d (Queued:%d TotalReturnWait:%d)", sched_id, sched['name'], mod, index, queue.qsize(), self.get_returns_queue_len()) # also update the stats module statsmgr.incr('core.worker-%s.queue-size' % mod, queue.qsize()) # Before return or get new actions, see how we manage # old ones: are they still in queue (s)? If True, we # must wait more or at least have more workers wait_ratio = self.wait_ratio.get_load() total_q = 0 for mod in self.q_by_mod: for queue in self.q_by_mod[mod].values(): total_q += queue.qsize() if total_q != 0 and wait_ratio < 2 * self.polling_interval: logger.debug("I decide to up wait ratio") self.wait_ratio.update_load(wait_ratio * 2) # self.wait_ratio.update_load(self.polling_interval) else: # Go to self.polling_interval on normal run, if wait_ratio # was >2*self.polling_interval, # it make it come near 2 because if < 2, go up :) self.wait_ratio.update_load(self.polling_interval) wait_ratio = self.wait_ratio.get_load() logger.debug("Wait ratio: %f", wait_ratio) statsmgr.incr('core.wait-ratio', wait_ratio) # We can wait more than 1s if needed, # no more than 5s, but no less than 1 timeout = self.timeout * wait_ratio timeout = max(self.polling_interval, timeout) self.timeout = min(5 * self.polling_interval, timeout) statsmgr.incr('core.timeout', wait_ratio) # Maybe we do not have enough workers, we check for it # and launch the new ones if needed self.adjust_worker_number_by_load() # Manage all messages we've got in the last timeout # for queue in self.return_messages: while self.get_returns_queue_len() != 0: self.manage_action_return(self.get_returns_queue_item()) # If we are passive, we do not initiate the check getting # and return if not self.passive: # Now we can get new actions from schedulers self.get_new_actions() # We send all finished checks # REF: doc/alignak-action-queues.png (6) self.manage_returns() # Get objects from our modules that are not worker based self.get_objects_from_from_queues() # Say to modules it's a new tick :) self.hook_point('tick')
if self.new_conf: self.setup_new_conf() # Maybe the last loop we raised some broks internally # we should integrate them in broks self.interger_internal_broks() # Also reap broks sent from the arbiters self.interger_arbiter_broks() # Main job, go get broks in our distants daemons types = ['scheduler', 'poller', 'reactionner', 'receiver'] for _type in types: _t0 = time.time() # And from schedulers self.get_new_broks(i_type=_type) statsmgr.incr('get-new-broks.%s' % _type, time.time() - _t0) # Sort the brok list by id self.broks.sort(sort_by_ids) # and for external queues # REF: doc/broker-modules.png (3) # We put to external queues broks that was not already send t00 = time.time() # We are sending broks as a big list, more efficient than one by one ext_modules = self.modules_manager.get_external_instances() to_send = [brok for brok in self.broks if getattr(brok, 'need_send_to_ext', True)] # Send our pack to all external modules to_q queue so they can get the wole packet # beware, the sub-process/queue can be die/close, so we put to restart the whole module # instead of killing ourself :)
def do_loop_turn(self): """Satellite main loop:: * Setup new conf if necessary * Watch for new conf * Check and delete zombies actions / modules * Get returns from queues * Adjust worker number * Get new actions :return: None """ logger.debug("Loop turn") # Maybe the arbiter ask us to wait for a new conf # If true, we must restart all... if self.cur_conf is None: # Clean previous run from useless objects # and close modules self.clean_previous_run() self.wait_for_initial_conf() # we may have been interrupted or so; then # just return from this loop turn if not self.new_conf: return self.setup_new_conf() # Now we check if arbiter speak to us. # If so, we listen to it # When it push a conf, we reinit connections # Sleep in waiting a new conf :) # TODO: manage the diff again. while self.timeout > 0: begin = time.time() self.watch_for_new_conf(self.timeout) end = time.time() if self.new_conf: self.setup_new_conf() self.timeout = self.timeout - (end - begin) logger.debug(" ======================== ") self.timeout = self.polling_interval # Check if zombies workers are among us :) # If so: KILL THEM ALL!!! self.check_and_del_zombie_workers() # But also modules self.check_and_del_zombie_modules() # Print stats for debug for sched_id in self.schedulers: sched = self.schedulers[sched_id] for mod in self.q_by_mod: # In workers we've got actions send to queue - queue size for (index, queue) in self.q_by_mod[mod].items(): logger.debug( "[%d][%s][%s] Stats: Workers:%d (Queued:%d TotalReturnWait:%d)", sched_id, sched['name'], mod, index, queue.qsize(), self.get_returns_queue_len()) # also update the stats module statsmgr.incr('core.worker-%s.queue-size' % mod, queue.qsize()) # Before return or get new actions, see how we manage # old ones: are they still in queue (s)? If True, we # must wait more or at least have more workers wait_ratio = self.wait_ratio.get_load() total_q = 0 for mod in self.q_by_mod: for queue in self.q_by_mod[mod].values(): total_q += queue.qsize() if total_q != 0 and wait_ratio < 2 * self.polling_interval: logger.debug("I decide to up wait ratio") self.wait_ratio.update_load(wait_ratio * 2) # self.wait_ratio.update_load(self.polling_interval) else: # Go to self.polling_interval on normal run, if wait_ratio # was >2*self.polling_interval, # it make it come near 2 because if < 2, go up :) self.wait_ratio.update_load(self.polling_interval) wait_ratio = self.wait_ratio.get_load() logger.debug("Wait ratio: %f", wait_ratio) statsmgr.incr('core.wait-ratio', wait_ratio) # We can wait more than 1s if needed, # no more than 5s, but no less than 1 timeout = self.timeout * wait_ratio timeout = max(self.polling_interval, timeout) self.timeout = min(5 * self.polling_interval, timeout) statsmgr.incr('core.timeout', wait_ratio) # Maybe we do not have enough workers, we check for it # and launch the new ones if needed self.adjust_worker_number_by_load() # Manage all messages we've got in the last timeout # for queue in self.return_messages: while self.get_returns_queue_len() != 0: self.manage_action_return(self.get_returns_queue_item()) # If we are passive, we do not initiate the check getting # and return if not self.passive: # Now we can get new actions from schedulers self.get_new_actions() # We send all finished checks # REF: doc/alignak-action-queues.png (6) self.manage_returns() # Get objects from our modules that are not worker based self.get_objects_from_from_queues() # Say to modules it's a new tick :) self.hook_point('tick')