class PipelineManager(object): @classmethod def config_description(cls): configs = TriggerManager.config_description() configs.update(dict( pipeline_handlers=ConfigItem(required=True, help="dictionary of pipeline handlers to load " "Classes specified with simport syntax. " "simport docs for more info"), pipeline_worker_batch_size=ConfigItem( help="Number of streams for pipeline " "worker(s) to load at a time", default=1000), pipeline_worker_delay=ConfigItem( help="Number of seconds for pipeline worker " "to sleep when it finds no streams to " "process", default=10), pipeline_config=ConfigItem(required=True, help="Name of pipeline config file " "defining the handlers for each " "pipeline."), purge_completed_streams=ConfigItem( help="Delete successfully proccessed " "streams when finished?", default=True), )) return configs def __init__(self, config, db=None, pipeline_handlers=None, pipeline_config=None, trigger_defs=None): logger.debug("PipelineManager: Using config: %s" % str(config)) config = ConfigManager.wrap(config, self.config_description()) self.config = config config.check_config() config.add_config_path(*config['config_path']) if db is not None: self.db = db else: self.db = DBInterface(config['database']) if pipeline_handlers is not None: self.pipeline_handlers = pipeline_handlers else: self.pipeline_handlers = self._load_plugins(config['pipeline_handlers']) logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers)) if pipeline_config is not None: self.pipeline_config = pipeline_config else: self.pipeline_config = config.load_file(config['pipeline_config']) logger.debug("Pipeline config: %s" % str(self.pipeline_config)) for pipeline, handler_configs in self.pipeline_config.items(): self.pipeline_config[pipeline] = [Pipeline.check_handler_config(conf, self.pipeline_handlers) for conf in handler_configs] if trigger_defs is not None: self.trigger_definitions = trigger_defs else: defs = config.load_file(config['trigger_definitions']) logger.debug("Loaded trigger definitions %s" % str(defs)) self.trigger_definitions = [TriggerDefinition(conf, None) for conf in defs] self.trigger_map = dict((tdef.name, tdef) for tdef in self.trigger_definitions) self.trigger_manager = TriggerManager(self.config, db=self.db, trigger_defs=self.trigger_definitions) self.pipeline_worker_batch_size = config['pipeline_worker_batch_size'] self.pipeline_worker_delay = config['pipeline_worker_delay'] self.statistics_period = config['statistics_period'] self.purge_completed_streams = config['purge_completed_streams'] self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() @classmethod def _load_plugins(cls, plug_map, defaults=None): plugins = dict() if defaults is not None: plugins.update(defaults) for name, cls_string in plug_map.items(): try: plugins[name] = simport.load(cls_string) except simport.ImportFailed as e: log.error("Could not load plugin %s: Import failed. %s" % ( name, e)) except (simport.MissingMethodOrFunction, simport.MissingModule, simport.BadDirectory) as e: log.error("Could not load plugin %s: Not found. %s" % ( name, e)) return plugins def current_time(self): # here so it's easily overridden. return datetime.datetime.utcnow() def _log_statistics(self): logger.info("Loaded %s streams. Fired %s, Expired %s." % ( self.streams_loaded, self.streams_fired, self.streams_expired)) self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() self.trigger_manager.debug_manager.dump_debuggers() def add_new_events(self, events): for event in events: self.trigger_manager.add_event(event) def _run_pipeline(self, stream, trigger_def, pipeline_name, pipeline_config): events = self.db.get_stream_events(stream) debugger = trigger_def.debugger try: pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers) new_events = pipeline.handle_events(events, debugger) except PipelineExecutionError: logger.error("Exception in pipeline %s handling stream %s" % ( pipeline_name, stream.id)) return False if new_events: self.add_new_events(new_events) return True def _complete_stream(self, stream): if self.purge_completed_streams: self.db.purge_stream(stream) else: try: self.db.set_stream_state(stream, StreamState.completed) except LockError: logger.error("Stream %s locked while trying to set 'complete' state! " "This should not happen." % stream.id) def _error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.error) except LockError: logger.error("Stream %s locked while trying to set 'error' state! " "This should not happen." % stream.id) def _expire_error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.expire_error) except LockError: logger.error("Stream %s locked while trying to set 'expire_error' state! " "This should not happen." % stream.id) def safe_get_debugger(self, trigger_def): return trigger_def.debugger if trigger_def is not None else \ self.trigger_manager.debug_manager.get_debugger(None) def fire_stream(self, stream): trigger_def = self.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.firing) except LockError: logger.debug("Stream %s locked. Moving on..." % stream.id) debugger.bump_counter("Locked") return False logger.debug("Firing Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % ( stream.id, stream.name)) self._error_stream(stream) return False pipeline = trigger_def.fire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error("Trigger %s for stream %s has unknown " "pipeline %s" % (stream.name, stream.id, pipeline)) self._error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._error_stream(stream) return False else: logger.debug("No fire pipeline for stream %s. Nothing to do." % ( stream.id)) debugger.bump_counter("No fire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams fired") self.streams_fired +=1 return True def expire_stream(self, stream): trigger_def = self.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.expiring) except LockError: debugger.bump_counter("Locked") logger.debug("Stream %s locked. Moving on..." % stream.id) return False logger.debug("Expiring Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % ( stream.id, stream.name)) self._expire_error_stream(stream) return False pipeline = trigger_def.expire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error("Trigger %s for stream %s has unknown pipeline %s" % ( stream.name, stream.id, pipeline)) self._expire_error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._expire_error_stream(stream) return False else: logger.debug("No expire pipeline for stream %s. Nothing to do." % ( stream.id)) debugger.bump_counter("No expire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams expired") self.streams_expired +=1 return True def process_ready_streams(self, batch_size, expire=False): streams = self.db.get_ready_streams(batch_size, self.current_time(), expire=expire) stream_ct = len(streams) if expire: logger.debug("Loaded %s streams to expire." % stream_ct) else: logger.debug("Loaded %s streams to fire." % stream_ct) random.shuffle(streams) for stream in streams: if expire: self.expire_stream(stream) else: self.fire_stream(stream) self.streams_loaded += stream_ct return stream_ct def run(self): while True: fire_ct = self.process_ready_streams(self.pipeline_worker_batch_size) expire_ct = self.process_ready_streams(self.pipeline_worker_batch_size, expire=True) if (self.current_time() - self.last_status).seconds > self.statistics_period: self._log_statistics() if not fire_ct and not expire_ct: logger.debug("No streams to fire or expire. Sleeping...") time.sleep(self.pipeline_worker_delay)
class PipelineManager(object): @classmethod def config_description(cls): configs = TriggerManager.config_description() configs.update( dict( pipeline_handlers=ConfigItem( required=True, help="dictionary of pipeline handlers to load " "Classes specified with simport syntax. " "simport docs for more info"), pipeline_worker_batch_size=ConfigItem( help="Number of streams for pipeline " "worker(s) to load at a time", default=1000), pipeline_worker_delay=ConfigItem( help="Number of seconds for pipeline worker " "to sleep when it finds no streams to " "process", default=10), pipeline_config=ConfigItem(required=True, help="Name of pipeline config file " "defining the handlers for each " "pipeline."), purge_completed_streams=ConfigItem( help="Delete successfully proccessed " "streams when finished?", default=True), )) return configs def __init__(self, config, db=None, pipeline_handlers=None, pipeline_config=None, trigger_defs=None): logger.debug("PipelineManager: Using config: %s" % str(config)) config = ConfigManager.wrap(config, self.config_description()) self.config = config config.check_config() config.add_config_path(*config['config_path']) if db is not None: self.db = db else: self.db = DBInterface(config['database']) if pipeline_handlers is not None: self.pipeline_handlers = pipeline_handlers else: self.pipeline_handlers = self._load_plugins( config['pipeline_handlers']) logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers)) if pipeline_config is not None: self.pipeline_config = pipeline_config else: self.pipeline_config = config.load_file(config['pipeline_config']) logger.debug("Pipeline config: %s" % str(self.pipeline_config)) for pipeline, handler_configs in self.pipeline_config.items(): self.pipeline_config[pipeline] = [ Pipeline.check_handler_config(conf, self.pipeline_handlers) for conf in handler_configs ] if trigger_defs is not None: self.trigger_definitions = trigger_defs else: defs = config.load_file(config['trigger_definitions']) logger.debug("Loaded trigger definitions %s" % str(defs)) self.trigger_definitions = [ TriggerDefinition(conf, None) for conf in defs ] self.trigger_map = dict( (tdef.name, tdef) for tdef in self.trigger_definitions) self.trigger_manager = TriggerManager( self.config, db=self.db, trigger_defs=self.trigger_definitions) self.pipeline_worker_batch_size = config['pipeline_worker_batch_size'] self.pipeline_worker_delay = config['pipeline_worker_delay'] self.statistics_period = config['statistics_period'] self.purge_completed_streams = config['purge_completed_streams'] self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() @classmethod def _load_plugins(cls, plug_map, defaults=None): plugins = dict() if defaults is not None: plugins.update(defaults) for name, cls_string in plug_map.items(): try: plugins[name] = simport.load(cls_string) except simport.ImportFailed as e: log.error("Could not load plugin %s: Import failed. %s" % (name, e)) except (simport.MissingMethodOrFunction, simport.MissingModule, simport.BadDirectory) as e: log.error("Could not load plugin %s: Not found. %s" % (name, e)) return plugins def current_time(self): # here so it's easily overridden. return datetime.datetime.utcnow() def _log_statistics(self): logger.info( "Loaded %s streams. Fired %s, Expired %s." % (self.streams_loaded, self.streams_fired, self.streams_expired)) self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() self.trigger_manager.debug_manager.dump_debuggers() def add_new_events(self, events): for event in events: self.trigger_manager.add_event(event) def _run_pipeline(self, stream, trigger_def, pipeline_name, pipeline_config): events = self.db.get_stream_events(stream) debugger = trigger_def.debugger try: pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers) new_events = pipeline.handle_events(events, debugger) except PipelineExecutionError: logger.error("Exception in pipeline %s handling stream %s" % (pipeline_name, stream.id)) return False if new_events: self.add_new_events(new_events) return True def _complete_stream(self, stream): if self.purge_completed_streams: self.db.purge_stream(stream) else: try: self.db.set_stream_state(stream, StreamState.completed) except LockError: logger.error( "Stream %s locked while trying to set 'complete' state! " "This should not happen." % stream.id) def _error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.error) except LockError: logger.error("Stream %s locked while trying to set 'error' state! " "This should not happen." % stream.id) def _expire_error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.expire_error) except LockError: logger.error( "Stream %s locked while trying to set 'expire_error' state! " "This should not happen." % stream.id) def safe_get_debugger(self, trigger_def): return trigger_def.debugger if trigger_def is not None else \ self.trigger_manager.debug_manager.get_debugger(None) def fire_stream(self, stream): trigger_def = self.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.firing) except LockError: logger.debug("Stream %s locked. Moving on..." % stream.id) debugger.bump_counter("Locked") return False logger.debug("Firing Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % (stream.id, stream.name)) self._error_stream(stream) return False pipeline = trigger_def.fire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error("Trigger %s for stream %s has unknown " "pipeline %s" % (stream.name, stream.id, pipeline)) self._error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._error_stream(stream) return False else: logger.debug("No fire pipeline for stream %s. Nothing to do." % (stream.id)) debugger.bump_counter("No fire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams fired") self.streams_fired += 1 return True def expire_stream(self, stream): trigger_def = self.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.expiring) except LockError: debugger.bump_counter("Locked") logger.debug("Stream %s locked. Moving on..." % stream.id) return False logger.debug("Expiring Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % (stream.id, stream.name)) self._expire_error_stream(stream) return False pipeline = trigger_def.expire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error( "Trigger %s for stream %s has unknown pipeline %s" % (stream.name, stream.id, pipeline)) self._expire_error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._expire_error_stream(stream) return False else: logger.debug("No expire pipeline for stream %s. Nothing to do." % (stream.id)) debugger.bump_counter("No expire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams expired") self.streams_expired += 1 return True def process_ready_streams(self, batch_size, expire=False): streams = self.db.get_ready_streams(batch_size, self.current_time(), expire=expire) stream_ct = len(streams) if expire: logger.debug("Loaded %s streams to expire." % stream_ct) else: logger.debug("Loaded %s streams to fire." % stream_ct) random.shuffle(streams) for stream in streams: if expire: self.expire_stream(stream) else: self.fire_stream(stream) self.streams_loaded += stream_ct return stream_ct def run(self): while True: fire_ct = self.process_ready_streams( self.pipeline_worker_batch_size) expire_ct = self.process_ready_streams( self.pipeline_worker_batch_size, expire=True) if (self.current_time() - self.last_status).seconds > self.statistics_period: self._log_statistics() if not fire_ct and not expire_ct: logger.debug("No streams to fire or expire. Sleeping...") time.sleep(self.pipeline_worker_delay)
class PipelineManager(object): @classmethod def config_description(cls): configs = TriggerManager.config_description() configs.update(dict( pipeline_handlers=ConfigItem( required=True, help="dictionary of pipeline handlers to load " "Classes specified with simport syntax. " "simport docs for more info"), pipeline_worker_batch_size=ConfigItem( help="Number of streams for pipeline " "worker(s) to load at a time", default=1000), pipeline_worker_delay=ConfigItem( help="Number of seconds for pipeline worker " "to sleep when it finds no streams to " "process", default=10), pipeline_config=ConfigItem(required=True, help="Name of pipeline config file " "defining the handlers for each " "pipeline."), purge_completed_streams=ConfigItem( help="Delete successfully proccessed " "streams when finished?", default=True), trim_events=ConfigItem( help="Delete events older than a configurable time.", default=False), trim_events_age=ConfigItem( help="Delete events older than this (timex expr).", default="$timestamp - 14d"), trim_events_batch_size=ConfigItem( help="Maximum number of events for pipeline " "worker(s) to trim at a time", default=100), )) return configs def __init__(self, config, db=None, pipeline_handlers=None, pipeline_config=None, trigger_defs=None, time_sync=None, proc_name='pipeline_worker'): # name used to distinguish worker processes in logs self.proc_name = proc_name logger.debug("PipelineManager(%s): Using config: %s" % (self.proc_name, str(config))) config = ConfigManager.wrap(config, self.config_description()) self.config = config self.trigger_definitions = [] config.check_config() config.add_config_path(*config['config_path']) if time_sync is None: time_sync = ts.TimeSync() self.time_sync = time_sync if db is not None: self.db = db else: self.db = DBInterface(config['database']) if pipeline_handlers is not None: self.pipeline_handlers = pipeline_handlers else: self.pipeline_handlers = self._load_plugins( config['pipeline_handlers']) logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers)) if pipeline_config is not None: self.pipeline_config = pipeline_config else: self.pipeline_config = config.load_file(config['pipeline_config']) logger.debug("Pipeline config: %s" % str(self.pipeline_config)) for pipeline, handler_configs in self.pipeline_config.items(): self.pipeline_config[pipeline] = [ Pipeline.check_handler_config(conf, self.pipeline_handlers) for conf in handler_configs] if trigger_defs is not None: self.trigger_definitions = trigger_defs else: # trigger_definition config file is optional if config.contains('trigger_definitions'): defs = config.load_file(config['trigger_definitions']) logger.debug("Loaded trigger definitions %s" % str(defs)) self.trigger_definitions = [ TriggerDefinition(conf, None) for conf in defs] self.trigger_manager = TriggerManager( self.config, db=self.db, trigger_defs=self.trigger_definitions, time_sync=time_sync) self.pipeline_worker_batch_size = config['pipeline_worker_batch_size'] self.pipeline_worker_delay = config['pipeline_worker_delay'] self.statistics_period = config['statistics_period'] self.purge_completed_streams = config['purge_completed_streams'] self.trim_events = config['trim_events'] self.trim_events_batch_size = config['trim_events_batch_size'] try: self.trim_events_age = timex.parse(str(config['trim_events_age'])) except timex.TimexError: logger.error("Invalid trim event expression: %s Event trimming " "disabled." % config['trim_events_age']) self.trim_events_age = None self.trim_events = False self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() @classmethod def _load_plugins(cls, plug_map, defaults=None): plugins = dict() if defaults is not None: plugins.update(defaults) for name, cls_string in plug_map.items(): try: plugins[name] = simport.load(cls_string) except simport.ImportFailed as e: logger.error("Could not load plugin %s: Import failed. %s" % ( name, e)) except (simport.MissingMethodOrFunction, simport.MissingModule, simport.BadDirectory) as e: logger.error("Could not load plugin %s: Not found. %s" % ( name, e)) return plugins def current_time(self): # here so it's easily overridden. return self.time_sync.current_time() def _log_statistics(self): logger.info("Loaded %s streams. Fired %s, Expired %s." % ( self.streams_loaded, self.streams_fired, self.streams_expired)) self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() self.trigger_manager.debug_manager.dump_debuggers() def add_new_events(self, events): for event in events: self.trigger_manager.add_event(event) def _run_pipeline(self, stream, trigger_def, pipeline_name, pipeline_config): events = self.db.get_stream_events(stream) debugger = trigger_def.debugger try: pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers) new_events = pipeline.handle_events(events, stream, debugger) except PipelineExecutionError: logger.error("Exception in pipeline %s handling stream %s" % ( pipeline_name, stream.id)) return False if new_events: self.add_new_events(new_events) return True def _complete_stream(self, stream): if self.purge_completed_streams: self.db.purge_stream(stream) else: try: self.db.set_stream_state(stream, StreamState.completed) except LockError: logger.error( "Stream %s locked while trying to set 'complete' state! " "This should not happen." % stream.id) def _error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.error) except LockError: logger.error("Stream %s locked while trying to set 'error' state! " "This should not happen." % stream.id) def _expire_error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.expire_error) except LockError: logger.error( "Stream %s locked while trying to set 'expire_error' state! " "This should not happen." % stream.id) def safe_get_debugger(self, trigger_def): return trigger_def.debugger if trigger_def is not None else \ self.trigger_manager.debug_manager.get_debugger(None) def add_trigger_definition(self, list_of_triggerdefs): self.trigger_manager.add_trigger_definition(list_of_triggerdefs) def delete_trigger_definition(self, trigger_def_name): self.trigger_manager.delete_trigger_definition(trigger_def_name) def fire_stream(self, stream): trigger_def = self.trigger_manager.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.firing) except LockError: logger.debug("Stream %s locked. Moving on..." % stream.id) debugger.bump_counter("Locked") return False logger.debug("Firing Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % ( stream.id, stream.name)) self._error_stream(stream) return False pipeline = trigger_def.fire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error("Trigger %s for stream %s has unknown " "pipeline %s" % (stream.name, stream.id, pipeline)) self._error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._error_stream(stream) return False else: logger.debug("No fire pipeline for stream %s. Nothing to do." % ( stream.id)) debugger.bump_counter("No fire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams fired") self.streams_fired += 1 return True def expire_stream(self, stream): trigger_def = self.trigger_manager.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.expiring) except LockError: debugger.bump_counter("Locked") logger.debug("Stream %s locked. Moving on..." % stream.id) return False logger.debug("Expiring Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % ( stream.id, stream.name)) self._expire_error_stream(stream) return False pipeline = trigger_def.expire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error( "Trigger %s for stream %s has unknown pipeline %s" % ( stream.name, stream.id, pipeline)) self._expire_error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._expire_error_stream(stream) return False else: logger.debug("No expire pipeline for stream %s. Nothing to do." % ( stream.id)) debugger.bump_counter("No expire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams expired") self.streams_expired += 1 return True def process_ready_streams(self, batch_size, expire=False): streams = self.db.get_ready_streams(batch_size, self.current_time(), expire=expire) stream_ct = len(streams) if expire: logger.debug("Loaded %s streams to expire." % stream_ct) else: logger.debug("Loaded %s streams to fire." % stream_ct) random.shuffle(streams) for stream in streams: if expire: self.expire_stream(stream) else: self.fire_stream(stream) self.streams_loaded += stream_ct return stream_ct def process_trim_events(self): trim_date = self.trim_events_age().timestamp event_ids = self.db.find_older_events(trim_date, self.trim_events_batch_size) logger.debug("Trimming %s old events" % len(event_ids)) self.db.purge_events(event_ids) return len(event_ids) def run(self): while True: try: fire_ct = self.process_ready_streams( self.pipeline_worker_batch_size) expire_ct = self.process_ready_streams( self.pipeline_worker_batch_size, expire=True) trim_ct = 0 if self.trim_events: trim_ct = self.process_trim_events() if ((self.current_time() - self.last_status).seconds > self.statistics_period): self._log_statistics() if not fire_ct and not expire_ct and not trim_ct: logger.debug("No streams to fire or expire. Sleeping...") time.sleep(self.pipeline_worker_delay) except DatabaseConnectionError: logger.warn("Database Connection went away. Reconnecting...") time.sleep(5) # DB layer will reconnect automatically. We just need to # retry the operation. (mdragon) except Exception: logger.exception("Unknown Error in pipeline worker!") raise
class PipelineManager(object): @classmethod def config_description(cls): configs = TriggerManager.config_description() configs.update( dict( pipeline_handlers=ConfigItem( required=True, help="dictionary of pipeline handlers to load " "Classes specified with simport syntax. " "simport docs for more info"), pipeline_worker_batch_size=ConfigItem( help="Number of streams for pipeline " "worker(s) to load at a time", default=1000), pipeline_worker_delay=ConfigItem( help="Number of seconds for pipeline worker " "to sleep when it finds no streams to " "process", default=10), pipeline_config=ConfigItem(required=True, help="Name of pipeline config file " "defining the handlers for each " "pipeline."), purge_completed_streams=ConfigItem( help="Delete successfully proccessed " "streams when finished?", default=True), trim_events=ConfigItem( help="Delete events older than a configurable time.", default=False), trim_events_age=ConfigItem( help="Delete events older than this (timex expr).", default="$timestamp - 14d"), trim_events_batch_size=ConfigItem( help="Maximum number of events for pipeline " "worker(s) to trim at a time", default=100), )) return configs def __init__(self, config, db=None, pipeline_handlers=None, pipeline_config=None, trigger_defs=None, time_sync=None, proc_name='pipeline_worker'): # name used to distinguish worker processes in logs self.proc_name = proc_name logger.debug("PipelineManager(%s): Using config: %s" % (self.proc_name, str(config))) config = ConfigManager.wrap(config, self.config_description()) self.config = config self.trigger_definitions = [] config.check_config() config.add_config_path(*config['config_path']) if time_sync is None: time_sync = ts.TimeSync() self.time_sync = time_sync if db is not None: self.db = db else: self.db = DBInterface(config['database']) if pipeline_handlers is not None: self.pipeline_handlers = pipeline_handlers else: self.pipeline_handlers = self._load_plugins( config['pipeline_handlers']) logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers)) if pipeline_config is not None: self.pipeline_config = pipeline_config else: self.pipeline_config = config.load_file(config['pipeline_config']) logger.debug("Pipeline config: %s" % str(self.pipeline_config)) for pipeline, handler_configs in self.pipeline_config.items(): self.pipeline_config[pipeline] = [ Pipeline.check_handler_config(conf, self.pipeline_handlers) for conf in handler_configs ] if trigger_defs is not None: self.trigger_definitions = trigger_defs else: # trigger_definition config file is optional if config.contains('trigger_definitions'): defs = config.load_file(config['trigger_definitions']) logger.debug("Loaded trigger definitions %s" % str(defs)) self.trigger_definitions = [ TriggerDefinition(conf, None) for conf in defs ] self.trigger_manager = TriggerManager( self.config, db=self.db, trigger_defs=self.trigger_definitions, time_sync=time_sync) self.pipeline_worker_batch_size = config['pipeline_worker_batch_size'] self.pipeline_worker_delay = config['pipeline_worker_delay'] self.statistics_period = config['statistics_period'] self.purge_completed_streams = config['purge_completed_streams'] self.trim_events = config['trim_events'] self.trim_events_batch_size = config['trim_events_batch_size'] try: self.trim_events_age = timex.parse(str(config['trim_events_age'])) except timex.TimexError: logger.error("Invalid trim event expression: %s Event trimming " "disabled." % config['trim_events_age']) self.trim_events_age = None self.trim_events = False self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() @classmethod def _load_plugins(cls, plug_map, defaults=None): plugins = dict() if defaults is not None: plugins.update(defaults) for name, cls_string in plug_map.items(): try: plugins[name] = simport.load(cls_string) except simport.ImportFailed as e: logger.error("Could not load plugin %s: Import failed. %s" % (name, e)) except (simport.MissingMethodOrFunction, simport.MissingModule, simport.BadDirectory) as e: logger.error("Could not load plugin %s: Not found. %s" % (name, e)) return plugins def current_time(self): # here so it's easily overridden. return self.time_sync.current_time() def _log_statistics(self): logger.info( "Loaded %s streams. Fired %s, Expired %s." % (self.streams_loaded, self.streams_fired, self.streams_expired)) self.streams_fired = 0 self.streams_expired = 0 self.streams_loaded = 0 self.last_status = self.current_time() self.trigger_manager.debug_manager.dump_debuggers() def add_new_events(self, events): for event in events: self.trigger_manager.add_event(event) def _run_pipeline(self, stream, trigger_def, pipeline_name, pipeline_config): events = self.db.get_stream_events(stream) debugger = trigger_def.debugger try: pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers) new_events = pipeline.handle_events(events, stream, debugger) except PipelineExecutionError: logger.error("Exception in pipeline %s handling stream %s" % (pipeline_name, stream.id)) return False if new_events: self.add_new_events(new_events) return True def _complete_stream(self, stream): if self.purge_completed_streams: self.db.purge_stream(stream) else: try: self.db.set_stream_state(stream, StreamState.completed) except LockError: logger.error( "Stream %s locked while trying to set 'complete' state! " "This should not happen." % stream.id) def _error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.error) except LockError: logger.error("Stream %s locked while trying to set 'error' state! " "This should not happen." % stream.id) def _expire_error_stream(self, stream): try: self.db.set_stream_state(stream, StreamState.expire_error) except LockError: logger.error( "Stream %s locked while trying to set 'expire_error' state! " "This should not happen." % stream.id) def safe_get_debugger(self, trigger_def): return trigger_def.debugger if trigger_def is not None else \ self.trigger_manager.debug_manager.get_debugger(None) def add_trigger_definition(self, list_of_triggerdefs): self.trigger_manager.add_trigger_definition(list_of_triggerdefs) def delete_trigger_definition(self, trigger_def_name): self.trigger_manager.delete_trigger_definition(trigger_def_name) def fire_stream(self, stream): trigger_def = self.trigger_manager.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.firing) except LockError: logger.debug("Stream %s locked. Moving on..." % stream.id) debugger.bump_counter("Locked") return False logger.debug("Firing Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % (stream.id, stream.name)) self._error_stream(stream) return False pipeline = trigger_def.fire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error("Trigger %s for stream %s has unknown " "pipeline %s" % (stream.name, stream.id, pipeline)) self._error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._error_stream(stream) return False else: logger.debug("No fire pipeline for stream %s. Nothing to do." % (stream.id)) debugger.bump_counter("No fire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams fired") self.streams_fired += 1 return True def expire_stream(self, stream): trigger_def = self.trigger_manager.trigger_map.get(stream.name) debugger = self.safe_get_debugger(trigger_def) try: stream = self.db.set_stream_state(stream, StreamState.expiring) except LockError: debugger.bump_counter("Locked") logger.debug("Stream %s locked. Moving on..." % stream.id) return False logger.debug("Expiring Stream %s." % stream.id) if trigger_def is None: debugger.bump_counter("Unknown trigger def '%s'" % stream.name) logger.error("Stream %s has unknown trigger definition %s" % (stream.id, stream.name)) self._expire_error_stream(stream) return False pipeline = trigger_def.expire_pipeline if pipeline is not None: pipe_config = self.pipeline_config.get(pipeline) if pipe_config is None: debugger.bump_counter("Unknown pipeline '%s'" % pipeline) logger.error( "Trigger %s for stream %s has unknown pipeline %s" % (stream.name, stream.id, pipeline)) self._expire_error_stream(stream) if not self._run_pipeline(stream, trigger_def, pipeline, pipe_config): self._expire_error_stream(stream) return False else: logger.debug("No expire pipeline for stream %s. Nothing to do." % (stream.id)) debugger.bump_counter("No expire pipeline for '%s'" % stream.name) self._complete_stream(stream) debugger.bump_counter("Streams expired") self.streams_expired += 1 return True def process_ready_streams(self, batch_size, expire=False): streams = self.db.get_ready_streams(batch_size, self.current_time(), expire=expire) stream_ct = len(streams) if expire: logger.debug("Loaded %s streams to expire." % stream_ct) else: logger.debug("Loaded %s streams to fire." % stream_ct) random.shuffle(streams) for stream in streams: if expire: self.expire_stream(stream) else: self.fire_stream(stream) self.streams_loaded += stream_ct return stream_ct def process_trim_events(self): trim_date = self.trim_events_age().timestamp event_ids = self.db.find_older_events(trim_date, self.trim_events_batch_size) logger.debug("Trimming %s old events" % len(event_ids)) self.db.purge_events(event_ids) return len(event_ids) def run(self): while True: try: fire_ct = self.process_ready_streams( self.pipeline_worker_batch_size) expire_ct = self.process_ready_streams( self.pipeline_worker_batch_size, expire=True) trim_ct = 0 if self.trim_events: trim_ct = self.process_trim_events() if ((self.current_time() - self.last_status).seconds > self.statistics_period): self._log_statistics() if not fire_ct and not expire_ct and not trim_ct: logger.debug("No streams to fire or expire. Sleeping...") time.sleep(self.pipeline_worker_delay) except DatabaseConnectionError: logger.warn("Database Connection went away. Reconnecting...") time.sleep(5) # DB layer will reconnect automatically. We just need to # retry the operation. (mdragon) except Exception: logger.exception("Unknown Error in pipeline worker!") raise
class TriggerManager(object): @classmethod def config_description(cls): return dict( config_path=ConfigItem( help="Path(s) to find additional config files", multiple=True, default='.'), distiller_config=ConfigItem( required=False, help="Name of distiller config file " "describing what to extract from the " "notifications"), distiller_trait_plugins=ConfigItem( help="dictionary of trait plugins to load " "for stackdistiller. Classes specified with " "simport syntax. See stackdistiller and " "simport docs for more info", default=dict()), time_sync_endpoint=ConfigItem( help="URL of time sync service for use with" " replying old events.", default=None), catch_all_notifications=ConfigItem( help="Store basic info for all notifications," " even if not listed in distiller config", default=False), statistics_period=ConfigItem( help="Emit stats on event counts, etc every " "this many seconds", default=10), database=ConfigSection( help="Database connection info.", config_description=DBInterface.config_description()), trigger_definitions=ConfigItem( required=False, help="Name of trigger definitions file " "defining trigger conditions and what events to " "process for each stream"), ) def __init__(self, config, db=None, stackdistiller=None, trigger_defs=None, time_sync=None): config = ConfigManager.wrap(config, self.config_description()) self.config = config self.debug_manager = debugging.DebugManager() self.trigger_definitions = [] config.check_config() config.add_config_path(*config['config_path']) if time_sync is None: time_sync = ts.TimeSync() self.time_sync = time_sync if db is not None: self.db = db else: self.db = DBInterface(config['database']) if stackdistiller is not None: self.distiller = stackdistiller else: # distiller_config is optional if config.contains('distiller_config'): dist_config = config.load_file(config['distiller_config']) plugmap = self._load_plugins(config['distiller_trait_plugins'], distiller.DEFAULT_PLUGINMAP) self.distiller = distiller.Distiller( dist_config, trait_plugin_map=plugmap, catchall=config['catch_all_notifications']) if trigger_defs is not None: self.trigger_definitions = trigger_defs for t in self.trigger_definitions: t.set_debugger(self.debug_manager) else: # trigger_definition config file is optional if config.contains('trigger_definitions'): defs = config.load_file(config['trigger_definitions']) self.trigger_definitions = [ TriggerDefinition(conf, self.debug_manager) for conf in defs] # trigger_map is used to quickly access existing trigger_defs self.trigger_map = dict( (tdef.name, tdef) for tdef in self.trigger_definitions) self.saved_events = 0 self.received = 0 self.last_status = self.current_time() @classmethod def _load_plugins(cls, plug_map, defaults=None): plugins = dict() if defaults is not None: plugins.update(defaults) for name, cls_string in plug_map.items(): try: plugins[name] = simport.load(cls_string) except simport.ImportFailed as e: logger.error("Could not load plugin %s: Import failed. %s" % ( name, e)) except (simport.MissingMethodOrFunction, simport.MissingModule, simport.BadDirectory) as e: logger.error("Could not load plugin %s: Not found. %s" % ( name, e)) return plugins def current_time(self): # here so it's easily overridden. return self.time_sync.current_time() def save_event(self, event): traits = {} try: message_id = event['message_id'] timestamp = event['timestamp'] event_type = event['event_type'] except KeyError as e: logger.warning("Received invalid event: %s" % e) return False for key, val in event.items(): if key not in ('message_id', 'timestamp', 'event_type'): if val is not None: traits[key] = val try: self.db.create_event(message_id, event_type, timestamp, traits) self.saved_events += 1 return True except DuplicateError: logger.info("Received duplicate event %s, Ignoring." % message_id) return False def convert_notification(self, notification_body): cond = EventCondenser(self.db) cond.clear() self.received += 1 if self.distiller.to_event(notification_body, cond): if cond.validate(): return cond.get_event() else: logger.warning("Received invalid event") else: event_type = notification_body.get('event_type', '**no event_type**') message_id = notification_body.get('message_id', '**no id**') logger.info("Dropping unconverted %s notification %s" % (event_type, message_id)) return None def _log_statistics(self): logger.info("Received %s notifications. Saved %s events." % ( self.received, self.saved_events)) self.received = 0 self.saved_events = 0 self.last_status = self.current_time() self.debug_manager.dump_debuggers() def _add_or_create_stream(self, trigger_def, event, dist_traits): stream = self.db.get_active_stream(trigger_def.name, dist_traits, self.current_time()) if stream is None: trigger_def.debugger.bump_counter("New stream") stream = self.db.create_stream(trigger_def.name, event, dist_traits, trigger_def.expiration) logger.debug("Created New stream %s for %s: distinguished by %s" % (stream.id, trigger_def.name, str(dist_traits))) else: self.db.add_event_stream(stream, event, trigger_def.expiration) return stream def _ready_to_fire(self, stream, trigger_def): timestamp = trigger_def.get_fire_timestamp(self.current_time()) self.db.stream_ready_to_fire(stream, timestamp) trigger_def.debugger.bump_counter("Ready to fire") logger.debug("Stream %s ready to fire at %s" % (stream.id, timestamp)) def add_trigger_definition(self, list_of_triggerdefs, debugger=None): if debugger is None: debugger = self.debug_manager for td in list_of_triggerdefs: if (td['name'] in self.trigger_map) is False: # Only add if name is unique tdef = TriggerDefinition(td, debugger) self.trigger_definitions.append(tdef) self.trigger_map[td['name']] = tdef def delete_trigger_definition(self, trigger_def_name): if trigger_def_name in self.trigger_map: self.trigger_definitions.remove( self.trigger_map.get(trigger_def_name)) del self.trigger_map[trigger_def_name] def add_event(self, event): if self.save_event(event): for trigger_def in self.trigger_definitions: matched_criteria = trigger_def.match(event) if matched_criteria: dist_traits = trigger_def.get_distinguishing_traits( event, matched_criteria) stream = self._add_or_create_stream(trigger_def, event, dist_traits) trigger_def.debugger.bump_counter("Added events") if stream.fire_timestamp is None: if trigger_def.should_fire(self.db.get_stream_events( stream)): self._ready_to_fire(stream, trigger_def) def add_notification(self, notification_body): event = self.convert_notification(notification_body) if event: self.add_event(event)
class TriggerManager(object): @classmethod def config_description(cls): return dict( config_path=ConfigItem( help="Path(s) to find additional config files", multiple=True, default='.'), distiller_config=ConfigItem(required=False, help="Name of distiller config file " "describing what to extract from the " "notifications"), distiller_trait_plugins=ConfigItem( help="dictionary of trait plugins to load " "for stackdistiller. Classes specified with " "simport syntax. See stackdistiller and " "simport docs for more info", default=dict()), time_sync_endpoint=ConfigItem( help="URL of time sync service for use with" " replying old events.", default=None), catch_all_notifications=ConfigItem( help="Store basic info for all notifications," " even if not listed in distiller config", default=False), statistics_period=ConfigItem( help="Emit stats on event counts, etc every " "this many seconds", default=10), database=ConfigSection( help="Database connection info.", config_description=DBInterface.config_description()), trigger_definitions=ConfigItem( required=False, help="Name of trigger definitions file " "defining trigger conditions and what events to " "process for each stream"), ) def __init__(self, config, db=None, stackdistiller=None, trigger_defs=None, time_sync=None): config = ConfigManager.wrap(config, self.config_description()) self.config = config self.debug_manager = debugging.DebugManager() self.trigger_definitions = [] config.check_config() config.add_config_path(*config['config_path']) if time_sync is None: time_sync = ts.TimeSync() self.time_sync = time_sync if db is not None: self.db = db else: self.db = DBInterface(config['database']) if stackdistiller is not None: self.distiller = stackdistiller else: # distiller_config is optional if config.contains('distiller_config'): dist_config = config.load_file(config['distiller_config']) plugmap = self._load_plugins(config['distiller_trait_plugins'], distiller.DEFAULT_PLUGINMAP) self.distiller = distiller.Distiller( dist_config, trait_plugin_map=plugmap, catchall=config['catch_all_notifications']) if trigger_defs is not None: self.trigger_definitions = trigger_defs for t in self.trigger_definitions: t.set_debugger(self.debug_manager) else: # trigger_definition config file is optional if config.contains('trigger_definitions'): defs = config.load_file(config['trigger_definitions']) self.trigger_definitions = [ TriggerDefinition(conf, self.debug_manager) for conf in defs ] # trigger_map is used to quickly access existing trigger_defs self.trigger_map = dict( (tdef.name, tdef) for tdef in self.trigger_definitions) self.saved_events = 0 self.received = 0 self.last_status = self.current_time() @classmethod def _load_plugins(cls, plug_map, defaults=None): plugins = dict() if defaults is not None: plugins.update(defaults) for name, cls_string in plug_map.items(): try: plugins[name] = simport.load(cls_string) except simport.ImportFailed as e: logger.error("Could not load plugin %s: Import failed. %s" % (name, e)) except (simport.MissingMethodOrFunction, simport.MissingModule, simport.BadDirectory) as e: logger.error("Could not load plugin %s: Not found. %s" % (name, e)) return plugins def current_time(self): # here so it's easily overridden. return self.time_sync.current_time() def save_event(self, event): traits = {} try: message_id = event['message_id'] timestamp = event['timestamp'] event_type = event['event_type'] except KeyError as e: logger.warning("Received invalid event: %s" % e) return False for key, val in event.items(): if key not in ('message_id', 'timestamp', 'event_type'): if val is not None: traits[key] = val try: self.db.create_event(message_id, event_type, timestamp, traits) self.saved_events += 1 return True except DuplicateError: logger.info("Received duplicate event %s, Ignoring." % message_id) return False def convert_notification(self, notification_body): cond = EventCondenser(self.db) cond.clear() self.received += 1 if self.distiller.to_event(notification_body, cond): if cond.validate(): return cond.get_event() else: logger.warning("Received invalid event") else: event_type = notification_body.get('event_type', '**no event_type**') message_id = notification_body.get('message_id', '**no id**') logger.info("Dropping unconverted %s notification %s" % (event_type, message_id)) return None def _log_statistics(self): logger.info("Received %s notifications. Saved %s events." % (self.received, self.saved_events)) self.received = 0 self.saved_events = 0 self.last_status = self.current_time() self.debug_manager.dump_debuggers() def _add_or_create_stream(self, trigger_def, event, dist_traits): stream = self.db.get_active_stream(trigger_def.name, dist_traits, self.current_time()) if stream is None: trigger_def.debugger.bump_counter("New stream") stream = self.db.create_stream(trigger_def.name, event, dist_traits, trigger_def.expiration) logger.debug("Created New stream %s for %s: distinguished by %s" % (stream.id, trigger_def.name, str(dist_traits))) else: self.db.add_event_stream(stream, event, trigger_def.expiration) return stream def _ready_to_fire(self, stream, trigger_def): timestamp = trigger_def.get_fire_timestamp(self.current_time()) self.db.stream_ready_to_fire(stream, timestamp) trigger_def.debugger.bump_counter("Ready to fire") logger.debug("Stream %s ready to fire at %s" % (stream.id, timestamp)) def add_trigger_definition(self, list_of_triggerdefs, debugger=None): if debugger is None: debugger = self.debug_manager for td in list_of_triggerdefs: if (td['name'] in self.trigger_map) is False: # Only add if name is unique tdef = TriggerDefinition(td, debugger) self.trigger_definitions.append(tdef) self.trigger_map[td['name']] = tdef def delete_trigger_definition(self, trigger_def_name): if trigger_def_name in self.trigger_map: self.trigger_definitions.remove( self.trigger_map.get(trigger_def_name)) del self.trigger_map[trigger_def_name] def add_event(self, event): if self.save_event(event): for trigger_def in self.trigger_definitions: matched_criteria = trigger_def.match(event) if matched_criteria: dist_traits = trigger_def.get_distinguishing_traits( event, matched_criteria) stream = self._add_or_create_stream( trigger_def, event, dist_traits) trigger_def.debugger.bump_counter("Added events") if stream.fire_timestamp is None: if trigger_def.should_fire( self.db.get_stream_events(stream)): self._ready_to_fire(stream, trigger_def) def add_notification(self, notification_body): event = self.convert_notification(notification_body) if event: self.add_event(event)