class SaltEventsDaemon(Daemon): ''' The main daemon class where all the parsing, collecting and dumping takes place ''' def __init__(self, config, log_level=None, log_file=None, daemonize=False): self.opts = SaltEventsdLoader( config=config, log_level=log_level, log_file=log_file, daemonize=daemonize, ).getopts() self._pre_startup(self.opts) if type(self.opts) is not dict: log.info("Received invalid configdata, startup cancelled") sys.exit(1) self.config = self.opts['general'] super(SaltEventsDaemon, self).__init__(self.config['pidfile']) # the map of events are stored here, loaded in _init_events() self.event_map = None self._init_events(self.opts['events']) self.backends = self._init_backends(self.config['backends']) # the socket to listen on for the events self.sock_dir = self.config['sock_dir'] # two possible values for 'node': master and minion # they do the same thing, just on different sockets self.node = self.config['node'] # the id, usually 'master' self.nodeid = self.config['id'] # the statefile where we write the daemon status self.state_file = self.config['state_file'] # how many events to handle before updating the status self.state_timer = self.config['state_timer'] # we dont know our pid (yet), its updated in run() self.pid = None # how many parallel workers to start max self.max_workers = self.config['max_workers'] # the number of events to collect before starting a worker self.event_limit = self.config['event_limit'] # a list to keep track of the currently running workers # this is mainly for debugging to check wether all started # workers are correctly joined over time so we dont leak memory self.running_workers = [] # setup some counters used for the status # the events we matched on self.events_han = 0 self.events_rec = 0 # the threads we have created and joined # if there numbers diverge, we have a memory leak self.threads_cre = 0 self.threads_join = 0 # used to keep track of the delta between two stat_timer intervals # to calculate events per second handled/received self.stat_rec_count = 0 self.stat_hdl_count = 0 # the timer thats write data to the database every x seconds # this is used to push data into the database even if # self.event_limit is not reached regularly self.ev_timer_ev = False self.ev_timer_intrvl = self.config['dump_timer'] self.ev_timer = ResetTimer(self.ev_timer_intrvl, self, name='Event') # the timer that writes statistical data to the status-file self.state_timer_ev = False self.state_timer_intrvl = self.config['state_timer'] self.state_timer = ResetTimer(self.state_timer_intrvl, self, name='Stat') def timer_event(self, source=None): ''' This is called whenever the timer started in __init__() gets to the end of its counting loop ''' if not source: return if source == 'Event': self.ev_timer_ev = True elif source == 'Stat': self._write_state() else: pass def stop(self, signal, frame): ''' We override stop() to brake our main loop and have a pretty log message ''' log.info("Received signal {0}".format(signal)) # if we have running workers, run through all and join() the ones # that have finished. if we still have running workers after that, # wait 5 secs for the rest and then exit. Maybe we should improv # this a litte bit more if len(self.running_workers) > 0: clean_workers = [] for count in range(0, 2): for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("Joined worker #{0}".format( worker.getName())) if len(clean_workers) > 0: log.info("Waiting 5secs for remaining workers..") time.sleep(5) else: break log.info("salt-eventsd has shut down") # leave the cleanup to the supers stop try: super(SaltEventsDaemon, self).stop(signal, frame) except (IOError, OSError): os._exit(0) def start(self): ''' We override start() just for our log message ''' log.info("Starting salt-eventsd daemon") # leave the startup to the supers daemon, thats where all # the daemonizing and double-forking takes place super(SaltEventsDaemon, self).start() def run(self): ''' This method is automatically called by start() from our parent class ''' log.info("Initializing event-listener") self.pid = self._get_pid() self._write_state() self.listen() def _pre_startup(self, opts): ''' Does a startup-check if all needed parameters are found in the configfile. this is really important because we lose stdout in daemon mode and exceptions might not be seen by the user ''' required_general = [ 'sock_dir', 'node', 'max_workers', 'id', 'event_limit', 'pidfile', 'state_file', 'state_timer', 'dump_timer', ] for field in required_general: if field not in opts['general']: log.critical("Missing parameter " + "'{0}' in configfile".format(field)) sys.exit(1) def listen(self): ''' The main event loop where we receive the events and start the workers that dump our data into the database ''' # log on to saltstacks event-bus event = salt.utils.event.SaltEvent( self.node, self.sock_dir, ) # we store our events in a list, we dont really care about an order # or what kind of data is put in there. all that is configured with the # templates configured in the configfile event_queue = [] # start our timers self.ev_timer.start() self.state_timer.start() # this is for logline chronology so the timer-message always comes # _before_ the actual startup-message of the listening loop below :-) time.sleep(1) log.info("Entering main event loop") log.info("Listening on: {0}".format(event.puburi)) # read everything we can get our hands on while True: # the zmq-socket does not like ^C very much, make the error # a little more graceful. alright, alright, ignore the damn thing, # we're exiting anyways... try: ret = event.get_event(full=True) except zmq.ZMQError: pass except KeyboardInterrupt: log.info('Received CTRL+C, shutting down') self.stop(signal.SIGTERM, None) except AssertionError: # Incase the master restarts a reconnect needs to happen. event = salt.utils.event.SaltEvent( self.node, self.sock_dir, ) ret = event.get_event(full=True) # if we have not received enough events in to reach event_limit # and the timer has fired, dump the events collected so far # to the workers if (self.ev_timer_ev): if (len(self.running_workers) < self.max_workers) and \ (len(event_queue) > 0): self._init_worker(event_queue) # reset our queue to prevent duplicate entries del event_queue[:] # we reset the timer.ev_timer_ev at the end of the loop # so we can update the stats that are logged if ret is None: continue # filter only the events we're interested in. all events have a tag # we can filter them by. we match with a precompiled regex if 'tag' in ret: # filter out events with an empty tag. those are special if ret['tag'] != '': # run through our configured events and try to match the # current events tag against the ones we're interested in for key in self.event_map.keys(): if self.event_map[key]['tag'].match(ret['tag']): log.debug("Matching on {0}:{1}".format( key, ret['tag'])) prio = self.event_map[key].get('prio', 0) # push prio1-events directly into a worker if prio > 0: log.debug( 'Prio1 event found, pushing immediately!') self.events_han += 1 self._init_worker([ret]) else: event_queue.append(ret) self.events_han += 1 # once we reach the event_limit, start a worker that # writes that data in to the database if len(event_queue) >= self.event_limit: # only start a worker if not too many workers are running if len(self.running_workers) < self.max_workers: self._init_worker(event_queue) # reset the timer self.ev_timer.reset() # reset our queue to prevent duplicate entries del event_queue[:] else: # FIXME: we need to handle this situation somehow if # too many workers are running. just flush the events? # there really is no sane way except queueing more and more # until some sort of limit is reached and we care more about # our saltmaster than about the collected events! log.critical("Too many workers running, loosing data!!!") # a list for the workers that are still running clean_workers = [] # run through all the workers and join() the ones # that have finished dumping their data and keep # the running ones on our list for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("Joined worker #{0}".format(worker.getName())) self.threads_join += 1 # get rid of the old reference and set a new one # FIXME: is this really neccessary? del self.running_workers self.running_workers = clean_workers self.events_rec += 1 if (self.ev_timer_ev): self.ev_timer_ev = False log.info("Listen loop ended...") def _get_pid(self): ''' Get our current pid from the pidfile and fall back to os.getpid() if pidfile not present (in foreground mode) ''' pid = None try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = os.getpid() return pid def _write_state(self): ''' Writes a current status to the defined status-file this includes the current pid, events received/handled and threads created/joined ''' ev_hdl_per_s = float((float(self.events_han - self.stat_hdl_count)) / float(self.state_timer_intrvl)) ev_tot_per_s = float((float(self.events_rec - self.stat_rec_count)) / float(self.state_timer_intrvl)) if self.config['stat_worker']: stat_data = { 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join } self.threads_cre += 1 st_worker = SaltEventsdWorker(stat_data, self.threads_cre, None, self.backends, **self.opts) st_worker.start() try: self.running_workers.append(st_worker) except AttributeError: log.error('self is missing running_workers') try: log.info(self) log.info(dir(self)) except Exception: log.error('Failed to dump dir(self)') try: # write the info to the specified log statf = open(self.state_file, 'w') statf.writelines( json.dumps({ 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join })) # if we have the same pid as the pidfile, we are the running daemon # and also print the current counters to the logfile with 'info' if os.getpid() == self.pid: log.info("Running with pid {0}".format(self.pid)) log.info("Events (han/recv): {0}/{1}".format( self.events_han, self.events_rec, )) log.info("Threads (cre/joi):{0}/{1}".format( self.threads_cre, self.threads_join, )) statf.write("\n") statf.close() sys.stdout.flush() except IOError as ioerr: log.critical("Failed to write state to {0}".format( self.state_file)) log.exception(ioerr) except OSError as oserr: log.critical("Failed to write state to {0}".format( self.state_file)) log.exception(oserr) self.stat_rec_count = self.events_rec self.stat_hdl_count = self.events_han def _init_backends(self, backends): ''' Loads the backends from the workers dir defined in the config and fails back to the codebase dir incase they're not found there. ''' backend_dirs = [ os.path.dirname(os.path.realpath(__file__)) + '/workers/' ] if 'backend_dir' in self.config: backend_dirs.insert(0, self.config['backend_dir']) backend_mngr = BackendMngr(backend_dirs) return backend_mngr.load_plugins() def _init_events(self, events={}): ''' Creates a dict of precompiled regexes for all defined events from config for maximum performance. ''' self.event_map = events # we precompile all regexes log.info("Initialising events...") for key in events.keys(): # we compile the regex configured in the config self.event_map[key]['tag'] = compile(events[key]['tag']) log.info("Added event '{0}'".format(key)) # if subevents are configured, also update them with # regex-matching object if 'subs' in events[key]: for sub_ev in events[key]['subs'].keys(): try: self.event_map[key]['subs'][sub_ev]['fun'] = compile( events[key]['subs'][sub_ev]['fun']) except KeyError: pass try: self.event_map[key]['subs'][sub_ev]['tag'] = compile( events[key]['subs'][sub_ev]['tag']) except KeyError: pass log.info("Added sub-event '{0}->{1}'".format(key, sub_ev)) def _init_worker(self, qdata): ''' The method dumps the data into a worker thread which handles pushing the data into different backends. ''' self.threads_cre += 1 log.info("Starting worker #{0}".format(self.threads_cre)) # make sure we pass a copy of the list worker = SaltEventsdWorker(list(qdata), self.threads_cre, self.event_map, self.backends, **self.opts) worker.start() self.running_workers.append(worker)
class SaltEventsDaemon(salteventsd.daemon.Daemon): ''' The main daemon class where all the parsing, collecting and dumping takes place ''' def __init__(self): self.opts = SaltEventsdLoader().getopts() self._pre_startup(self.opts) if type(self.opts) is not dict: log.info("Received invalid configdata, startup cancelled") sys.exit(1) self.config = self.opts['general'] super(SaltEventsDaemon, self).__init__(self.config['pidfile']) # the map of events are stored here, loaded in _init_events() self.event_map = None self._init_events(self.opts['events']) self.backends = self._init_backends( self.config['backends'] ) log.info(self.backends) # the socket to listen on for the events self.sock_dir = self.config['sock_dir'] # two possible values for 'node': master and minion # they do the same thing, just on different sockets self.node = self.config['node'] # the id, usually 'master' self.nodeid = self.config['id'] # the statefile where we write the daemon status self.state_file = self.config['state_file'] # how many events to handle before updating the status self.state_upd = self.config['state_upd'] # we dont know our pid (yet) self.pid = None # how many parallel workers to start max self.max_workers = self.config['max_workers'] # the number of events to collect before starting a worker self.event_limit = self.config['event_limit'] # a list to keep track of the currently running workers # this is mainly for debugging to check wether all started # workers are correctly joined over time so we dont leak memory self.running_workers = [] # setup some counters used for the status self.events_han = 0 self.events_rec = 0 self.threads_cre = 0 self.threads_join = 0 # the timer thats write data to the database every x seconds # this is used to push data into the database even if # self.event_limit is not reached regularly self.ev_timer_ev = False self.ev_timer_intrvl = self.config['dump_timer'] self.ev_timer = ResetTimer(self.ev_timer_intrvl, self) def timer_event(self): ''' this is called whenever the timer started in __init__() gets to the end of its counting loop ''' self.ev_timer_ev = True def stop(self, signal, frame): ''' we override stop() to brake our main loop and have a pretty log message ''' log.info("received signal {0}".format(signal)) # if we have running workers, run through all and join() the ones # that have finished. if we still have running workers after that, # wait 5 secs for the rest and then exit. Maybe we should improv # this a litte bit more if( len(self.running_workers) > 0 ): clean_workers = [] for count in range(0, 2): for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("joined worker #{0}".format(worker.getName())) if( len(clean_workers) > 0 ): log.info("waiting 5secs for remaining workers..") time.sleep(5) else: break log.info("salt-eventsd has shut down") # leave the cleanup to the supers stop super(SaltEventsDaemon, self).stop(signal, frame) def start(self): ''' we override start() just for our log message ''' log.info("starting salt-eventsd daemon") # leave the startup to the supers daemon, thats where all # the daemonizing and double-forking takes place super(SaltEventsDaemon, self).start() def run(self): ''' the method automatically called by start() from our parent class ''' log.info("initializing event listener") self.pid = self._get_pid() self._write_state() self.listen() def _pre_startup(self, opts): ''' does a startup-check if all needed parameters are found in the configfile. this is really important because we lose stdout in daemon mode and exceptions might not be seen by the user ''' required_general = [ 'sock_dir', 'node', 'max_workers', 'id', 'event_limit', 'pidfile', 'state_file', 'state_upd', 'dump_timer' ] for field in required_general: if field not in opts['general']: log.critical("Missing parameter " + "'{0}' in configfile".format(field)) sys.exit(1) def listen(self): ''' the main event loop where we receive the events and start the workers that dump our data into the database ''' # log on to saltstacks event-bus event = salt.utils.event.SaltEvent(self.node, self.sock_dir) # we store our events in a list, we dont really care about an order # or what kind of data is put in there. all that is configured with the # sql-template configured in the configfile event_queue = [] # start our dump_timer self.ev_timer.start() # this is for logline chronology so the timer-message always comes # _before_ the actual startup-message of the listening loop below :-) time.sleep(1) log.info("entering main event loop") log.info("listening on: {0}".format(event.puburi)) # read everything we can get our hands on while True: # the zmq-socket does not like ^C very much, make the error # a little more graceful. alright, alright, ignore the damn thing, # we're exiting anyways... try: ret = event.get_event(full=True) except zmq.ZMQError: pass if ret is None: continue # if the timer has expired, we may have not received enough # events in the queue to reach event_limit, in that case we dump # the data anyway to have it in the database if(self.ev_timer_ev): if (len(self.running_workers) < self.max_workers) and \ (len(event_queue) > 0): self._init_worker(event_queue) # reset our queue to prevent duplicate entries del event_queue[:] # we reset the timer.ev_timer_ev at the end of the loop # so we can update the stats that are logged # filter only the events we're interested in. all events have a tag # we can filter them by. we match with a precompiled regex if( 'tag' in ret ): # filter out events with an empty tag. those are special if( ret['tag'] != '' ): # run through our configured events and try to match the # current events tag against the ones we're interested in for key in self.event_map.keys(): if( self.event_map[key]['tag'].match(ret['tag'])): log.debug("matching on {0}:{1}".format(key, ret['tag'])) prio = self.event_map[key].get('prio', 0) # push prio1-events directly into a worker if prio > 0: log.debug('Prio1 event found, pushing immediately!') self.events_han += 1 self._init_worker([ret]) else: event_queue.append(ret) self.events_han += 1 # once we reach the event_limit, start a worker that # writes that data in to the database if len(event_queue) >= self.event_limit: # only start a worker if not too many workers are running if len(self.running_workers) < self.max_workers: self._init_worker(event_queue) # reset the timer self.ev_timer.reset() # reset our queue to prevent duplicate entries del event_queue[:] else: # FIXME: we need to handle this situation somehow if # too many workers are running. just flush the events? # there really is no sane way except queueing more and more # until some sort of limit is reached and we care more about # our saltmaster than about the collected events! log.critical("too many workers running, loosing data!!!") # a list for the workers that are still running clean_workers = [] # run through all the workers and join() the ones # that have finished dumping their data and keep # the running ones on our list for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("joined worker #{0}".format(worker.getName())) self.threads_join += 1 # get rid of the old reference and set a new one # FIXME: is this really neccessary? del self.running_workers self.running_workers = clean_workers self.events_rec += 1 # we update the stats every 'received div handled == 0' # or if we recevied a timer event from our ResetTimer if( (self.events_rec % self.state_upd) == 0 ): self._write_state() elif(self.ev_timer_ev): self._write_state() self.ev_timer_ev = False log.info("listen loop ended...") def _get_pid(self): ''' get our current pid from the pidfile, basically the same as os.getpid() ''' try: pidf = file(self.pidfile,'r') pid = int(pidf.read().strip()) pidf.close() return pid except IOError: return None def _write_state(self): ''' writes a current status to the defined status-file this includes the current pid, events received/handled and threads created/joined ''' try: # write the info to the specified log statf = open(self.state_file, 'w') statf.writelines(simplejson.dumps({'events_received':self.events_rec, 'events_handled':self.events_han, 'threads_created':self.threads_cre, 'threads_joined':self.threads_join} )) # if we have the same pid as the pidfile, we are the running daemon # and also print the current counters to the logfile with 'info' if( os.getpid() == self.pid ): log.info("running with pid {0}".format(self.pid)) log.info("events (han/recv): {0}/{1}".format(self.events_han, self.events_rec)) log.info("threads (cre/joi):{0}/{1}".format(self.threads_cre, self.threads_join)) statf.write("\n") statf.close() sys.stdout.flush() except IOError as ioerr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(ioerr) except OSError as oserr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(oserr) def _init_backends(self, backends): backend_mngr = BackendMngr( ['/usr/share/pyshared/salteventsd/', self.config['backend_dir'] ] ) return backend_mngr.load_plugins() def _init_events(self, events={}): ''' this is used to tell the class about the events it should handle. it has to be a dictionary with appropriate mappings in it. see the config file for examples on how to compose the dict. each entry is converted to a precompiled regex for maximum flexibility ''' self.event_map = events # we precompile all regexes log.info("initialising events...") for key in events.keys(): # we compile the regex configured in the config self.event_map[key]['tag'] = compile( events[key]['tag'] ) log.info("Added event '{0}'".format(key)) # if subevents are configured, also update them with # regex-macthing object if( events[key].has_key('subs') ): for sub_ev in events[key]['subs'].keys(): try: self.event_map[key]['subs'][sub_ev]['fun'] = compile(events[key]['subs'][sub_ev]['fun']) except KeyError: pass try: self.event_map[key]['subs'][sub_ev]['tag'] = compile(events[key]['subs'][sub_ev]['tag']) except KeyError: pass log.info("Added sub-event '{0}->{1}'".format(key, sub_ev)) # the method dumps the data into a worker thread which # handles pushing the data into different backends def _init_worker(self, qdata): ''' write a collection of events to the database. every invocation of this methoed creates its own thread that writes into the database ''' self.threads_cre += 1 log.info("starting worker #{0}".format(self.threads_cre)) # make sure we pass a copy of the list worker = SaltEventsdWorker(list(qdata), self.threads_cre, self.event_map, self.backends, **self.opts) worker.start() self.running_workers.append(worker)
class SaltEventsDaemon(Daemon): ''' The main daemon class where all the parsing, collecting and dumping takes place ''' def __init__(self, config, log_level=None, log_file=None, daemonize=False): self.opts = SaltEventsdLoader( config=config, log_level=log_level, log_file=log_file, daemonize=daemonize, ).getopts() self._pre_startup(self.opts) if type(self.opts) is not dict: log.info("Received invalid configdata, startup cancelled") sys.exit(1) self.config = self.opts['general'] super(SaltEventsDaemon, self).__init__(self.config['pidfile']) # the map of events are stored here, loaded in _init_events() self.event_map = None self._init_events(self.opts['events']) self.backends = self._init_backends(self.config['backends']) # the socket to listen on for the events self.sock_dir = self.config['sock_dir'] # two possible values for 'node': master and minion # they do the same thing, just on different sockets self.node = self.config['node'] # the id, usually 'master' self.nodeid = self.config['id'] # the statefile where we write the daemon status self.state_file = self.config['state_file'] # how many events to handle before updating the status self.state_timer = self.config['state_timer'] # we dont know our pid (yet), its updated in run() self.pid = None # how many parallel workers to start max self.max_workers = self.config['max_workers'] # the number of events to collect before starting a worker self.event_limit = self.config['event_limit'] # a list to keep track of the currently running workers # this is mainly for debugging to check wether all started # workers are correctly joined over time so we dont leak memory self.running_workers = [] # setup some counters used for the status # the events we matched on self.events_han = 0 self.events_rec = 0 # the threads we have created and joined # if there numbers diverge, we have a memory leak self.threads_cre = 0 self.threads_join = 0 # used to keep track of the delta between two stat_timer intervals # to calculate events per second handled/received self.stat_rec_count = 0 self.stat_hdl_count = 0 # the timer thats write data to the database every x seconds # this is used to push data into the database even if # self.event_limit is not reached regularly self.ev_timer_ev = False self.ev_timer_intrvl = self.config['dump_timer'] self.ev_timer = ResetTimer( self.ev_timer_intrvl, self, name='Event' ) # the timer that writes statistical data to the status-file self.state_timer_ev = False self.state_timer_intrvl = self.config['state_timer'] self.state_timer = ResetTimer( self.state_timer_intrvl, self, name='Stat' ) def timer_event(self, source=None): ''' This is called whenever the timer started in __init__() gets to the end of its counting loop ''' if not source: return if source == 'Event': self.ev_timer_ev = True elif source == 'Stat': self._write_state() else: pass def stop(self, signal, frame): ''' We override stop() to brake our main loop and have a pretty log message ''' log.info("Received signal {0}".format(signal)) # if we have running workers, run through all and join() the ones # that have finished. if we still have running workers after that, # wait 5 secs for the rest and then exit. Maybe we should improv # this a litte bit more if len(self.running_workers) > 0: clean_workers = [] for count in range(0, 2): for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("Joined worker #{0}".format(worker.getName())) if len(clean_workers) > 0: log.info("Waiting 5secs for remaining workers..") time.sleep(5) else: break log.info("salt-eventsd has shut down") # leave the cleanup to the supers stop try: super(SaltEventsDaemon, self).stop(signal, frame) except (IOError, OSError): os._exit(0) def start(self): ''' We override start() just for our log message ''' log.info("Starting salt-eventsd daemon") # leave the startup to the supers daemon, thats where all # the daemonizing and double-forking takes place super(SaltEventsDaemon, self).start() def run(self): ''' This method is automatically called by start() from our parent class ''' log.info("Initializing event-listener") self.pid = self._get_pid() self._write_state() self.listen() def _pre_startup(self, opts): ''' Does a startup-check if all needed parameters are found in the configfile. this is really important because we lose stdout in daemon mode and exceptions might not be seen by the user ''' required_general = [ 'sock_dir', 'node', 'max_workers', 'id', 'event_limit', 'pidfile', 'state_file', 'state_timer', 'dump_timer', ] for field in required_general: if field not in opts['general']: log.critical("Missing parameter " + "'{0}' in configfile".format(field)) sys.exit(1) def listen(self): ''' The main event loop where we receive the events and start the workers that dump our data into the database ''' # log on to saltstacks event-bus event = salt.utils.event.SaltEvent( self.node, self.sock_dir, ) # we store our events in a list, we dont really care about an order # or what kind of data is put in there. all that is configured with the # templates configured in the configfile event_queue = [] # start our timers self.ev_timer.start() self.state_timer.start() # this is for logline chronology so the timer-message always comes # _before_ the actual startup-message of the listening loop below :-) time.sleep(1) log.info("Entering main event loop") log.info("Listening on: {0}".format(event.puburi)) # read everything we can get our hands on while True: # the zmq-socket does not like ^C very much, make the error # a little more graceful. alright, alright, ignore the damn thing, # we're exiting anyways... try: ret = event.get_event(full=True) except zmq.ZMQError: pass except KeyboardInterrupt: log.info('Received CTRL+C, shutting down') self.stop(signal.SIGTERM, None) except AssertionError: # Incase the master restarts a reconnect needs to happen. event = salt.utils.event.SaltEvent( self.node, self.sock_dir, ) ret = event.get_event(full=True) # if we have not received enough events in to reach event_limit # and the timer has fired, dump the events collected so far # to the workers if(self.ev_timer_ev): if (len(self.running_workers) < self.max_workers) and \ (len(event_queue) > 0): self._init_worker(event_queue) # reset our queue to prevent duplicate entries del event_queue[:] # we reset the timer.ev_timer_ev at the end of the loop # so we can update the stats that are logged if ret is None: continue # filter only the events we're interested in. all events have a tag # we can filter them by. we match with a precompiled regex if 'tag' in ret: # filter out events with an empty tag. those are special if ret['tag'] != '': # run through our configured events and try to match the # current events tag against the ones we're interested in for key in self.event_map.keys(): if self.event_map[key]['tag'].match(ret['tag']): log.debug("Matching on {0}:{1}".format(key, ret['tag'])) prio = self.event_map[key].get('prio', 0) # push prio1-events directly into a worker if prio > 0: log.debug('Prio1 event found, pushing immediately!') self.events_han += 1 self._init_worker([ret]) else: event_queue.append(ret) self.events_han += 1 # once we reach the event_limit, start a worker that # writes that data in to the database if len(event_queue) >= self.event_limit: # only start a worker if not too many workers are running if len(self.running_workers) < self.max_workers: self._init_worker(event_queue) # reset the timer self.ev_timer.reset() # reset our queue to prevent duplicate entries del event_queue[:] else: # FIXME: we need to handle this situation somehow if # too many workers are running. just flush the events? # there really is no sane way except queueing more and more # until some sort of limit is reached and we care more about # our saltmaster than about the collected events! log.critical("Too many workers running, loosing data!!!") # a list for the workers that are still running clean_workers = [] # run through all the workers and join() the ones # that have finished dumping their data and keep # the running ones on our list for worker in self.running_workers: if worker.isAlive(): clean_workers.append(worker) else: worker.join() log.debug("Joined worker #{0}".format(worker.getName())) self.threads_join += 1 # get rid of the old reference and set a new one # FIXME: is this really neccessary? del self.running_workers self.running_workers = clean_workers self.events_rec += 1 if(self.ev_timer_ev): self.ev_timer_ev = False log.info("Listen loop ended...") def _get_pid(self): ''' Get our current pid from the pidfile and fall back to os.getpid() if pidfile not present (in foreground mode) ''' pid = None try: pidf = file(self.pidfile, 'r') pid = int(pidf.read().strip()) pidf.close() except IOError: pid = os.getpid() return pid def _write_state(self): ''' Writes a current status to the defined status-file this includes the current pid, events received/handled and threads created/joined ''' ev_hdl_per_s = float((float(self.events_han - self.stat_hdl_count)) / float(self.state_timer_intrvl)) ev_tot_per_s = float((float(self.events_rec - self.stat_rec_count)) / float(self.state_timer_intrvl)) if self.config['stat_worker']: stat_data = { 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join } self.threads_cre += 1 st_worker = SaltEventsdWorker( stat_data, self.threads_cre, None, self.backends, **self.opts ) st_worker.start() try: self.running_workers.append(st_worker) except AttributeError: log.error('self is missing running_workers') try: log.info(self) log.info(dir(self)) except Exception: log.error('Failed to dump dir(self)') try: # write the info to the specified log statf = open(self.state_file, 'w') statf.writelines( json.dumps({ 'events_rec': self.events_rec, 'events_hdl': self.events_han, 'events_hdl_sec': round(ev_hdl_per_s, 2), 'events_tot_sec': round(ev_tot_per_s, 2), 'threads_created': self.threads_cre, 'threads_joined': self.threads_join }) ) # if we have the same pid as the pidfile, we are the running daemon # and also print the current counters to the logfile with 'info' if os.getpid() == self.pid: log.info("Running with pid {0}".format(self.pid)) log.info("Events (han/recv): {0}/{1}".format( self.events_han, self.events_rec, )) log.info("Threads (cre/joi):{0}/{1}".format( self.threads_cre, self.threads_join, )) statf.write("\n") statf.close() sys.stdout.flush() except IOError as ioerr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(ioerr) except OSError as oserr: log.critical("Failed to write state to {0}".format(self.state_file)) log.exception(oserr) self.stat_rec_count = self.events_rec self.stat_hdl_count = self.events_han def _init_backends(self, backends): ''' Loads the backends from the workers dir defined in the config and fails back to the codebase dir incase they're not found there. ''' backend_dirs = [ os.path.dirname(os.path.realpath(__file__)) + '/workers/' ] if 'backend_dir' in self.config: backend_dirs.insert(0, self.config['backend_dir']) backend_mngr = BackendMngr(backend_dirs) return backend_mngr.load_plugins() def _init_events(self, events={}): ''' Creates a dict of precompiled regexes for all defined events from config for maximum performance. ''' self.event_map = events # we precompile all regexes log.info("Initialising events...") for key in events.keys(): # we compile the regex configured in the config self.event_map[key]['tag'] = compile(events[key]['tag']) log.info("Added event '{0}'".format(key)) # if subevents are configured, also update them with # regex-matching object if 'subs' in events[key]: for sub_ev in events[key]['subs'].keys(): try: self.event_map[key]['subs'][sub_ev]['fun'] = compile(events[key]['subs'][sub_ev]['fun']) except KeyError: pass try: self.event_map[key]['subs'][sub_ev]['tag'] = compile(events[key]['subs'][sub_ev]['tag']) except KeyError: pass log.info("Added sub-event '{0}->{1}'".format(key, sub_ev)) def _init_worker(self, qdata): ''' The method dumps the data into a worker thread which handles pushing the data into different backends. ''' self.threads_cre += 1 log.info("Starting worker #{0}".format(self.threads_cre)) # make sure we pass a copy of the list worker = SaltEventsdWorker( list(qdata), self.threads_cre, self.event_map, self.backends, **self.opts ) worker.start() self.running_workers.append(worker)