def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [
                TriggerDefinition(conf, None) for conf in defs
            ]
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(
            self.config, db=self.db, trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
 def __init__(self, conf):
     super(EventProcessor, self).__init__(conf)
     self._winchester_config = conf.winchester.winchester_config
     self._config_mgr = ConfigManager.load_config_file(self._winchester_config)
     self._trigger_manager = TriggerManager(self._config_mgr)
     self._group = conf.kafka.stream_def_group
     self._tm_lock = threading.Lock()
Example #3
0
class WinchesterHandler(BaseHandler):
    CONFIG_SECTION = "winchester"
    AUTO_ACK = True

    def __init__(self, app=None, queue_name=None):
        super(WinchesterHandler, self).__init__(app=app, queue_name=queue_name)
        conf_file = self.config_get("config_file")
        config = ConfigManager.load_config_file(conf_file)
        self.trigger_manager = TriggerManager(config)

    def handle_messages(self, messages, env):
        for notification in self.iterate_payloads(messages, env):
            self.trigger_manager.add_notification(notification)

    def on_idle(self, num_messages, queue_name):
        self.trigger_manager._log_statistics()
 def config_description(cls):
     configs = TriggerManager.config_description()
     configs.update(
         dict(
             pipeline_handlers=ConfigItem(
                 required=True,
                 help="dictionary of pipeline handlers to load "
                 "Classes specified with simport syntax. "
                 "simport docs for more info"),
             pipeline_worker_batch_size=ConfigItem(
                 help="Number of streams for pipeline "
                 "worker(s) to load at a time",
                 default=1000),
             pipeline_worker_delay=ConfigItem(
                 help="Number of seconds for pipeline worker "
                 "to sleep when it finds no streams to "
                 "process",
                 default=10),
             pipeline_config=ConfigItem(required=True,
                                        help="Name of pipeline config file "
                                        "defining the handlers for each "
                                        "pipeline."),
             purge_completed_streams=ConfigItem(
                 help="Delete successfully proccessed "
                 "streams when finished?",
                 default=True),
         ))
     return configs
Example #5
0
    def run(self):
        ''' The Event Processor needs to initialize the TriggerManager with
        Trigger Defs from the DB at startup.  It will read the stream-def-events kafka topic for
        the addition/deletion of stream-defs from the API.  It will read the transformed-events
        kafka topic for distilled event processing.
        '''

        # Initialization
        self.tm_lock = threading.Lock()
        self.trigger_manager = TriggerManager(self.config_mgr)

        # TODO read stream-definitions from DB at startup and add

        self.stream_def_thread = threading.Thread(name='stream_defs',
                                                  target=stream_definition_consumer,
                                                  args=(self.kafka_config, self.tm_lock, self.trigger_manager,))

        self.event_thread = threading.Thread(name='events',
                                             target=event_consumer,
                                             args=(self.kafka_config, self.tm_lock, self.trigger_manager,))

        self.stream_def_thread.start()
        self.event_thread.start()

        self.stream_def_thread.join()
        self.event_thread.join()
        log.debug('Exiting')
 def config_description(cls):
     configs = TriggerManager.config_description()
     configs.update(dict(
                 pipeline_handlers=ConfigItem(required=True,
                              help="dictionary of pipeline handlers to load "
                                    "Classes specified with simport syntax. "
                                    "simport docs for more info"),
                 pipeline_worker_batch_size=ConfigItem(
                              help="Number of streams for pipeline "
                                   "worker(s) to load at a time",
                                   default=1000),
                 pipeline_worker_delay=ConfigItem(
                              help="Number of seconds for pipeline worker "
                                   "to sleep when it finds no streams to "
                                   "process", default=10),
                 pipeline_config=ConfigItem(required=True,
                                    help="Name of pipeline config file "
                                         "defining the handlers for each "
                                         "pipeline."),
                 purge_completed_streams=ConfigItem(
                                    help="Delete successfully proccessed "
                                         "streams when finished?",
                                         default=True),
                ))
     return configs
    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [Pipeline.check_handler_config(conf,
                                                self.pipeline_handlers)
                                              for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [TriggerDefinition(conf, None) for conf in defs]
        self.trigger_map = dict((tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(self.config, db=self.db,
                                              trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
Example #8
0
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(
            dict(
                pipeline_handlers=ConfigItem(
                    required=True,
                    help="dictionary of pipeline handlers to load "
                    "Classes specified with simport syntax. "
                    "simport docs for more info"),
                pipeline_worker_batch_size=ConfigItem(
                    help="Number of streams for pipeline "
                    "worker(s) to load at a time",
                    default=1000),
                pipeline_worker_delay=ConfigItem(
                    help="Number of seconds for pipeline worker "
                    "to sleep when it finds no streams to "
                    "process",
                    default=10),
                pipeline_config=ConfigItem(required=True,
                                           help="Name of pipeline config file "
                                           "defining the handlers for each "
                                           "pipeline."),
                purge_completed_streams=ConfigItem(
                    help="Delete successfully proccessed "
                    "streams when finished?",
                    default=True),
                trim_events=ConfigItem(
                    help="Delete events older than a configurable time.",
                    default=False),
                trim_events_age=ConfigItem(
                    help="Delete events older than this (timex expr).",
                    default="$timestamp - 14d"),
                trim_events_batch_size=ConfigItem(
                    help="Maximum number of events for pipeline "
                    "worker(s) to trim at a time",
                    default=100),
            ))
        return configs

    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None,
                 time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s" %
                     (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs
                ]

        self.trigger_manager = TriggerManager(
            self.config,
            db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" %
                             (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" %
                             (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def _log_statistics(self):
        logger.info(
            "Loaded %s streams. Fired %s, Expired %s." %
            (self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, stream, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" %
                         (pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def add_trigger_definition(self, list_of_triggerdefs):
        self.trigger_manager.add_trigger_definition(list_of_triggerdefs)

    def delete_trigger_definition(self, trigger_def_name):
        self.trigger_manager.delete_trigger_definition(trigger_def_name)

    def fire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" %
                             (stream.name, stream.id, pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" %
                    (stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size,
                                            self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def process_trim_events(self):
        trim_date = self.trim_events_age().timestamp
        event_ids = self.db.find_older_events(trim_date,
                                              self.trim_events_batch_size)
        logger.debug("Trimming %s old events" % len(event_ids))
        self.db.purge_events(event_ids)
        return len(event_ids)

    def run(self):
        while True:
            try:
                fire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size)
                expire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size, expire=True)

                trim_ct = 0
                if self.trim_events:
                    trim_ct = self.process_trim_events()

                if ((self.current_time() - self.last_status).seconds >
                        self.statistics_period):
                    self._log_statistics()

                if not fire_ct and not expire_ct and not trim_ct:
                    logger.debug("No streams to fire or expire. Sleeping...")
                    time.sleep(self.pipeline_worker_delay)
            except DatabaseConnectionError:
                logger.warn("Database Connection went away. Reconnecting...")
                time.sleep(5)
                # DB layer will reconnect automatically. We just need to
                # retry the operation. (mdragon)
            except Exception:
                logger.exception("Unknown Error in pipeline worker!")
                raise
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(
            dict(
                pipeline_handlers=ConfigItem(
                    required=True,
                    help="dictionary of pipeline handlers to load "
                    "Classes specified with simport syntax. "
                    "simport docs for more info"),
                pipeline_worker_batch_size=ConfigItem(
                    help="Number of streams for pipeline "
                    "worker(s) to load at a time",
                    default=1000),
                pipeline_worker_delay=ConfigItem(
                    help="Number of seconds for pipeline worker "
                    "to sleep when it finds no streams to "
                    "process",
                    default=10),
                pipeline_config=ConfigItem(required=True,
                                           help="Name of pipeline config file "
                                           "defining the handlers for each "
                                           "pipeline."),
                purge_completed_streams=ConfigItem(
                    help="Delete successfully proccessed "
                    "streams when finished?",
                    default=True),
            ))
        return configs

    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [
                TriggerDefinition(conf, None) for conf in defs
            ]
        self.trigger_map = dict(
            (tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(
            self.config, db=self.db, trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                log.error("Could not load plugin %s: Import failed. %s" %
                          (name, e))
            except (simport.MissingMethodOrFunction, simport.MissingModule,
                    simport.BadDirectory) as e:
                log.error("Could not load plugin %s: Not found. %s" %
                          (name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return datetime.datetime.utcnow()

    def _log_statistics(self):
        logger.info(
            "Loaded %s streams. Fired %s, Expired %s." %
            (self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" %
                         (pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def fire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" %
                             (stream.name, stream.id, pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" %
                         (stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" %
                    (stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." %
                         (stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size,
                                            self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def run(self):
        while True:
            fire_ct = self.process_ready_streams(
                self.pipeline_worker_batch_size)
            expire_ct = self.process_ready_streams(
                self.pipeline_worker_batch_size, expire=True)

            if (self.current_time() -
                    self.last_status).seconds > self.statistics_period:
                self._log_statistics()

            if not fire_ct and not expire_ct:
                logger.debug("No streams to fire or expire. Sleeping...")
                time.sleep(self.pipeline_worker_delay)
Example #10
0
 def __init__(self, app=None, queue_name=None):
     super(WinchesterHandler, self).__init__(app=app, queue_name=queue_name)
     conf_file = self.config_get("config_file")
     config = ConfigManager.load_config_file(conf_file)
     self.time_sync = time_sync.TimeSync(config, publishes=True)
     self.trigger_manager = TriggerManager(config, time_sync=self.time_sync)
Example #11
0
    def __init__(self, winchester_config):
        self.winchester_config = winchester_config

        self.config = ConfigManager.load_config_file(winchester_config)
        self.trigger_manager = TriggerManager(self.config)
        self.pipe = PipelineManager(self.config)
Example #12
0
class TriggerTest():
    """  Trigger Test

    Adds Stream Definitions to the TriggerManager and PipelineManager classes.  Adds Fake
    distilled events to the TriggerManager and ensures the Fire and expire handlers will get called.
    This test uses the winchester mysql DB.
    """
    """ test data """

    trig_def_fc1 = [{
        'distinguished_by': ['instance_id'],
        'fire_criteria': [{
            'event_type': 'compute.instance.create.start'
        }, {
            'event_type': 'compute.instance.create.end'
        }],
        'match_criteria': [{
            'event_type': 'compute.instance.create.*'
        }],
        'name':
        'fc1_trigger',
        'debug_level':
        2,
        'expiration':
        '$last + 1h',
        'fire_pipeline':
        'test_pipeline',
        'expire_pipeline':
        'test_expire_pipeline'
    }]

    trig_def_fc1_tenant406904_filter = [{
        'distinguished_by': ['instance_id'],
        'fire_criteria': [{
            'event_type': 'compute.instance.create.start'
        }, {
            'event_type': 'compute.instance.create.end'
        }],
        'match_criteria': [{
            'traits': {
                'tenant_id': '406904'
            },
            'event_type': 'compute.instance.create.*'
        }],
        'name':
        'trig_def_fc1_406904',
        'debug_level':
        2,
        'expiration':
        '$first + 10s',
        'fire_pipeline':
        'test_pipeline',
        'expire_pipeline':
        'test_expire_pipeline'
    }]

    trig_def_fc1_tenant123456_filter = [{
        'distinguished_by': ['instance_id'],
        'fire_criteria': [{
            'event_type': 'compute.instance.create.start'
        }, {
            'event_type': 'compute.instance.create.end'
        }],
        'match_criteria': [{
            'traits': {
                'tenant_id': '123456'
            },
            'event_type': 'compute.instance.create.*'
        }],
        'name':
        'fc1_trigger_123456',
        'debug_level':
        2,
        'expiration':
        '$last + 24h',
        'fire_pipeline':
        'test_pipeline',
        'expire_pipeline':
        'test_expire_pipeline'
    }]
    """ test adding events to cause fire criteria """

    distilled_events_fc1_tenant_406904 = [{
        'os_distro':
        'com.ubuntu',
        'event_type':
        'compute.instance.create.start',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '406904',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '111-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'building',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'message_id':
        '19701f6c-f51f-4ecb-85fb-7db40277627d'
    }, {
        'os_distro':
        'com.ubuntu',
        'message_id':
        '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '406904',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '111-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'active',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'launched_at':
        datetime.utcnow(),
        'event_type':
        'compute.instance.create.end'
    }]

    distilled_events_fc1_tenant_406904_missing_end = [{
        'os_distro':
        'com.ubuntu',
        'event_type':
        'compute.instance.create.start',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '406904',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '333-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'building',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'message_id':
        '19701f6c-f51f-4ecb-85fb-7db40277627d'
    }]

    distilled_events_fc1_tenant_123456 = [{
        'os_distro':
        'com.ubuntu',
        'event_type':
        'compute.instance.create.start',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '123456',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '456-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'building',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'message_id':
        '19701f6c-f51f-4ecb-85fb-7db40277627d'
    }, {
        'os_distro':
        'com.ubuntu',
        'message_id':
        '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '123456',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '456-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'active',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'launched_at':
        datetime.utcnow(),
        'event_type':
        'compute.instance.create.end'
    }]
    """ trigger defs for fire criteria 2 - looking for exists"""
    trig_def_fc2_rackspace_billing = [{
        'distinguished_by': ['instance_id', {
            'timestamp': 'day'
        }],
        'fire_criteria': [{
            'event_type': 'compute.instance.exists'
        }],
        'match_criteria': [{
            'event_type': ['compute.instance.*', '!compute.instance.exists']
        }, {
            'event_type': 'compute.instance.exists',
            'map_distingushed_by': {
                'timestamp': 'audit_period_beginning'
            }
        }],
        'name':
        'rackspace_billing',
        'debug_level':
        2,
        'expiration':
        '$last + 1h',
        'fire_pipeline':
        'test_pipeline',
        'expire_pipeline':
        'test_expire_pipeline'
    }]

    trig_def_fc3_rackspace = [{
        'distinguished_by': ['instance_id', {
            'timestamp': 'day'
        }],
        'fire_criteria': [{
            'traits': {
                'audit_period_ending': {
                    'datetime': '$audit_period_beginning + 1d'
                }
            },
            'event_type': 'compute.instance.exists'
        }],
        'match_criteria': [{
            'event_type': [
                'compute.instance.*', 'snapshot_instance', 'keypair.import.*',
                'rebuild_instance', 'compute.instance.*',
                '!compute.instance.exists', '!compute.instance.exists.failed',
                '!compute.instance.exists.warnings',
                '!compute.instance.exists.verified'
            ]
        }, {
            'event_type': 'compute.instance.exists',
            'map_distinguished_by': {
                'timestamp': 'audit_period_beginning'
            }
        }],
        'name':
        'rackspace_test_trigger',
        'debug_level':
        2,
        'expiration':
        '$last + 2d',
        'fire_pipeline':
        'test_fire_pipeline',
        'expire_pipeline':
        'test_expire_pipeline'
    }]

    distilled_events_fc2_tenant_222333 = [{
        'os_distro':
        'com.ubuntu',
        'event_type':
        'compute.instance.create.start',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '222333',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '772b2f73-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'building',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'message_id':
        '19701f6c-f51f-4ecb-85fb-7db40277627d'
    }, {
        'os_distro':
        'com.ubuntu',
        'message_id':
        '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '222333',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '772b2f73-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'active',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'launched_at':
        datetime.utcnow(),
        'event_type':
        'compute.instance.create.end'
    }, {
        'os_distro':
        'com.ubuntu',
        'message_id':
        '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
        'service':
        'publisher-302689',
        'instance_type':
        '512MB Standard Instance',
        'tenant_id':
        '222333',
        'instance_flavor_id':
        '2',
        'hostname':
        'server-462185',
        'host':
        'publisher-302689',
        'instance_flavor':
        '512MB Standard Instance',
        'instance_id':
        '772b2f73-3b0f-4057-b377-b65131e8532e',
        'os_version':
        '12.04',
        'state':
        'active',
        'os_architecture':
        'x64',
        'timestamp':
        datetime.utcnow(),
        'request_id':
        'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
        'launched_at':
        datetime.utcnow(),
        'event_type':
        'compute.instance.exists'
    }]

    def __init__(self, winchester_config):
        self.winchester_config = winchester_config

        self.config = ConfigManager.load_config_file(winchester_config)
        self.trigger_manager = TriggerManager(self.config)
        self.pipe = PipelineManager(self.config)

    def _add_unique_event(self, e):
        ''' make the static test data contain unique message id's '''
        e['message_id'] = uuid.uuid4()
        self.trigger_manager.add_event(e)

    def add_test_stream_definitions(self):
        self.trigger_manager.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant406904_filter)
        self.trigger_manager.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant123456_filter)

        self.pipe.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant406904_filter)
        self.pipe.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant123456_filter)

    def add_distilled_events_to_fire(self):
        for e in TriggerTest.distilled_events_fc1_tenant_406904:
            self._add_unique_event(e)
        for e in TriggerTest.distilled_events_fc1_tenant_123456:
            self._add_unique_event(e)

    def add_distilled_events_to_expire(self):
        for e in TriggerTest.distilled_events_fc1_tenant_406904_missing_end:
            self._add_unique_event(e)

    def add_distilled_events_with_no_match(self):
        for e in TriggerTest.distilled_events_fc2_tenant_222333:
            self._add_unique_event(e)

    def check_for_expired_streams(self):
        stream_count = self.pipe.process_ready_streams(
            self.pipe.pipeline_worker_batch_size, expire=True)
        return stream_count

    def check_for_fired_streams(self):
        stream_count = self.pipe.process_ready_streams(
            self.pipe.pipeline_worker_batch_size)
        return stream_count

    def test_no_match(self):
        self.add_distilled_events_with_no_match()
        time.sleep(2)
        fired_count = self.check_for_fired_streams()
        expired_count = self.check_for_expired_streams()

        if (fired_count == 0 and expired_count == 0):
            print("test_no_match: Success")
        else:
            print("test_no_match: Failed")

    def test_fired(self):
        self.add_distilled_events_to_fire()
        time.sleep(3)
        fired_count = self.check_for_fired_streams()
        expired_count = self.check_for_expired_streams()
        if (expired_count == 0 and fired_count == 2):
            print("test_fired: Success")
        else:
            print("test_fired: Failed")

    def test_expired(self):
        self.add_distilled_events_to_expire()
        time.sleep(11)
        fired_count = self.check_for_fired_streams()
        expired_count = self.check_for_expired_streams()
        if (expired_count == 1 and fired_count == 0):
            print("test_expired: Success")
        else:
            print("test_expired: Failed")
class PipelineManager(object):
    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(dict(
            pipeline_handlers=ConfigItem(
                required=True,
                help="dictionary of pipeline handlers to load "
                     "Classes specified with simport syntax. "
                     "simport docs for more info"),
            pipeline_worker_batch_size=ConfigItem(
                help="Number of streams for pipeline "
                     "worker(s) to load at a time",
                default=1000),
            pipeline_worker_delay=ConfigItem(
                help="Number of seconds for pipeline worker "
                     "to sleep when it finds no streams to "
                     "process", default=10),
            pipeline_config=ConfigItem(required=True,
                                       help="Name of pipeline config file "
                                            "defining the handlers for each "
                                            "pipeline."),
            purge_completed_streams=ConfigItem(
                help="Delete successfully proccessed "
                     "streams when finished?",
                default=True),
            trim_events=ConfigItem(
                help="Delete events older than a configurable time.",
                default=False),
            trim_events_age=ConfigItem(
                help="Delete events older than this (timex expr).",
                default="$timestamp - 14d"),
            trim_events_batch_size=ConfigItem(
                help="Maximum number of events for pipeline "
                     "worker(s) to trim at a time",
                default=100),
        ))
        return configs

    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None, time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s"
                     % (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf,
                                              self.pipeline_handlers)
                for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs]

        self.trigger_manager = TriggerManager(
            self.config, db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                logger.error("Could not load plugin %s: Import failed. %s" % (
                             name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                logger.error("Could not load plugin %s: Not found. %s" % (
                    name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return self.time_sync.current_time()

    def _log_statistics(self):
        logger.info("Loaded %s streams. Fired %s, Expired %s." % (
            self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config,
                                self.pipeline_handlers)
            new_events = pipeline.handle_events(events, stream, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" % (
                pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error(
                    "Stream %s locked while trying to set 'complete' state! "
                    "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error(
                "Stream %s locked while trying to set 'expire_error' state! "
                "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def add_trigger_definition(self, list_of_triggerdefs):
        self.trigger_manager.add_trigger_definition(list_of_triggerdefs)

    def delete_trigger_definition(self, trigger_def_name):
        self.trigger_manager.delete_trigger_definition(trigger_def_name)

    def fire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" % (stream.name, stream.id,
                                              pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired += 1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_manager.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error(
                    "Trigger %s for stream %s has unknown pipeline %s" % (
                        stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." % (
                stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired += 1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size, self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def process_trim_events(self):
        trim_date = self.trim_events_age().timestamp
        event_ids = self.db.find_older_events(trim_date,
                                              self.trim_events_batch_size)
        logger.debug("Trimming %s old events" % len(event_ids))
        self.db.purge_events(event_ids)
        return len(event_ids)

    def run(self):
        while True:
            try:
                fire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size)
                expire_ct = self.process_ready_streams(
                    self.pipeline_worker_batch_size,
                    expire=True)

                trim_ct = 0
                if self.trim_events:
                    trim_ct = self.process_trim_events()

                if ((self.current_time() - self.last_status).seconds
                        > self.statistics_period):
                    self._log_statistics()

                if not fire_ct and not expire_ct and not trim_ct:
                    logger.debug("No streams to fire or expire. Sleeping...")
                    time.sleep(self.pipeline_worker_delay)
            except DatabaseConnectionError:
                logger.warn("Database Connection went away. Reconnecting...")
                time.sleep(5)
                # DB layer will reconnect automatically. We just need to
                # retry the operation. (mdragon)
            except Exception:
                logger.exception("Unknown Error in pipeline worker!")
                raise
    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None, time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s"
                     % (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf,
                                              self.pipeline_handlers)
                for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs]

        self.trigger_manager = TriggerManager(
            self.config, db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
 def __init__(self, app=None, queue_name=None):
     super(WinchesterHandler, self).__init__(app=app, queue_name=queue_name)
     conf_file = self.config_get("config_file")
     config = ConfigManager.load_config_file(conf_file)
     self.time_sync = time_sync.TimeSync(config, publishes=True)
     self.trigger_manager = TriggerManager(config, time_sync=self.time_sync)
Example #16
0
 def __init__(self, app=None, queue_name=None):
     super(WinchesterHandler, self).__init__(app=app, queue_name=queue_name)
     conf_file = self.config_get("config_file")
     config = ConfigManager.load_config_file(conf_file)
     self.trigger_manager = TriggerManager(config)
class PipelineManager(object):

    @classmethod
    def config_description(cls):
        configs = TriggerManager.config_description()
        configs.update(dict(
                    pipeline_handlers=ConfigItem(required=True,
                                 help="dictionary of pipeline handlers to load "
                                       "Classes specified with simport syntax. "
                                       "simport docs for more info"),
                    pipeline_worker_batch_size=ConfigItem(
                                 help="Number of streams for pipeline "
                                      "worker(s) to load at a time",
                                      default=1000),
                    pipeline_worker_delay=ConfigItem(
                                 help="Number of seconds for pipeline worker "
                                      "to sleep when it finds no streams to "
                                      "process", default=10),
                    pipeline_config=ConfigItem(required=True,
                                       help="Name of pipeline config file "
                                            "defining the handlers for each "
                                            "pipeline."),
                    purge_completed_streams=ConfigItem(
                                       help="Delete successfully proccessed "
                                            "streams when finished?",
                                            default=True),
                   ))
        return configs

    def __init__(self, config, db=None, pipeline_handlers=None,
                 pipeline_config=None, trigger_defs=None):
        logger.debug("PipelineManager: Using config: %s" % str(config))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        config.check_config()
        config.add_config_path(*config['config_path'])
        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [Pipeline.check_handler_config(conf,
                                                self.pipeline_handlers)
                                              for conf in handler_configs]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            defs = config.load_file(config['trigger_definitions'])
            logger.debug("Loaded trigger definitions %s" % str(defs))
            self.trigger_definitions = [TriggerDefinition(conf, None) for conf in defs]
        self.trigger_map = dict((tdef.name, tdef) for tdef in self.trigger_definitions)

        self.trigger_manager = TriggerManager(self.config, db=self.db,
                                              trigger_defs=self.trigger_definitions)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

    @classmethod
    def _load_plugins(cls, plug_map, defaults=None):
        plugins = dict()
        if defaults is not None:
            plugins.update(defaults)
        for name, cls_string in plug_map.items():
            try:
                plugins[name] = simport.load(cls_string)
            except simport.ImportFailed as e:
                log.error("Could not load plugin %s: Import failed. %s" % (
                          name, e))
            except (simport.MissingMethodOrFunction,
                    simport.MissingModule,
                    simport.BadDirectory) as e:
                log.error("Could not load plugin %s: Not found. %s" % (
                          name, e))
        return plugins

    def current_time(self):
        # here so it's easily overridden.
        return datetime.datetime.utcnow()

    def _log_statistics(self):
        logger.info("Loaded %s streams. Fired %s, Expired %s." % (
                    self.streams_loaded, self.streams_fired, self.streams_expired))
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()

        self.trigger_manager.debug_manager.dump_debuggers()

    def add_new_events(self, events):
        for event in events:
            self.trigger_manager.add_event(event)

    def _run_pipeline(self, stream, trigger_def, pipeline_name,
                      pipeline_config):
        events = self.db.get_stream_events(stream)
        debugger = trigger_def.debugger
        try:
            pipeline = Pipeline(pipeline_name, pipeline_config, self.pipeline_handlers)
            new_events = pipeline.handle_events(events, debugger)
        except PipelineExecutionError:
            logger.error("Exception in pipeline %s handling stream %s" % (
                          pipeline_name, stream.id))
            return False
        if new_events:
            self.add_new_events(new_events)
        return True

    def _complete_stream(self, stream):
        if self.purge_completed_streams:
            self.db.purge_stream(stream)
        else:
            try:
                self.db.set_stream_state(stream, StreamState.completed)
            except LockError:
                logger.error("Stream %s locked while trying to set 'complete' state! "
                             "This should not happen." % stream.id)

    def _error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'error' state! "
                         "This should not happen." % stream.id)

    def _expire_error_stream(self, stream):
        try:
            self.db.set_stream_state(stream, StreamState.expire_error)
        except LockError:
            logger.error("Stream %s locked while trying to set 'expire_error' state! "
                         "This should not happen." % stream.id)

    def safe_get_debugger(self, trigger_def):
        return trigger_def.debugger if trigger_def is not None else \
            self.trigger_manager.debug_manager.get_debugger(None)

    def fire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.firing)
        except LockError:
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            debugger.bump_counter("Locked")
            return False
        logger.debug("Firing Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._error_stream(stream)
            return False
        pipeline = trigger_def.fire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown "
                             "pipeline %s" % (stream.name, stream.id,
                                              pipeline))
                self._error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._error_stream(stream)
                return False
        else:
            logger.debug("No fire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No fire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams fired")
        self.streams_fired +=1
        return True

    def expire_stream(self, stream):
        trigger_def = self.trigger_map.get(stream.name)
        debugger = self.safe_get_debugger(trigger_def)
        try:
            stream = self.db.set_stream_state(stream, StreamState.expiring)
        except LockError:
            debugger.bump_counter("Locked")
            logger.debug("Stream %s locked. Moving on..." % stream.id)
            return False
        logger.debug("Expiring Stream %s." % stream.id)
        if trigger_def is None:
            debugger.bump_counter("Unknown trigger def '%s'" % stream.name)
            logger.error("Stream %s has unknown trigger definition %s" % (
                         stream.id, stream.name))
            self._expire_error_stream(stream)
            return False
        pipeline = trigger_def.expire_pipeline
        if pipeline is not None:
            pipe_config = self.pipeline_config.get(pipeline)
            if pipe_config is None:
                debugger.bump_counter("Unknown pipeline '%s'" % pipeline)
                logger.error("Trigger %s for stream %s has unknown pipeline %s" % (
                            stream.name, stream.id, pipeline))
                self._expire_error_stream(stream)
            if not self._run_pipeline(stream, trigger_def, pipeline,
                                      pipe_config):
                self._expire_error_stream(stream)
                return False
        else:
            logger.debug("No expire pipeline for stream %s. Nothing to do." % (
                         stream.id))
            debugger.bump_counter("No expire pipeline for '%s'" % stream.name)
        self._complete_stream(stream)
        debugger.bump_counter("Streams expired")
        self.streams_expired +=1
        return True

    def process_ready_streams(self, batch_size, expire=False):
        streams = self.db.get_ready_streams(batch_size, self.current_time(),
                                            expire=expire)
        stream_ct = len(streams)
        if expire:
            logger.debug("Loaded %s streams to expire." % stream_ct)
        else:
            logger.debug("Loaded %s streams to fire." % stream_ct)

        random.shuffle(streams)
        for stream in streams:
            if expire:
                self.expire_stream(stream)
            else:
                self.fire_stream(stream)
        self.streams_loaded += stream_ct
        return stream_ct

    def run(self):
        while True:
            fire_ct = self.process_ready_streams(self.pipeline_worker_batch_size)
            expire_ct = self.process_ready_streams(self.pipeline_worker_batch_size,
                                                   expire=True)

            if (self.current_time() - self.last_status).seconds > self.statistics_period:
                self._log_statistics()

            if not fire_ct and not expire_ct:
                logger.debug("No streams to fire or expire. Sleeping...")
                time.sleep(self.pipeline_worker_delay)
class TriggerTest():

    """  Trigger Test

    Adds Stream Definitions to the TriggerManager and PipelineManager classes.  Adds Fake
    distilled events to the TriggerManager and ensures the Fire and expire handlers will get called.
    This test uses the winchester mysql DB.
    """

    """ test data """

    trig_def_fc1 = [{'distinguished_by': ['instance_id'],
                     'fire_criteria': [{'event_type': 'compute.instance.create.start'},
                                       {'event_type': 'compute.instance.create.end'}],
                     'match_criteria': [{'event_type': 'compute.instance.create.*'}],
                     'name': 'fc1_trigger',
                     'debug_level': 2,
                     'expiration': '$last + 1h',
                     'fire_pipeline': 'test_pipeline',
                     'expire_pipeline': 'test_expire_pipeline'}]

    trig_def_fc1_tenant406904_filter = [{'distinguished_by': ['instance_id'],
                                         'fire_criteria': [{'event_type': 'compute.instance.create.start'}, {'event_type': 'compute.instance.create.end'}],
                                         'match_criteria': [{'traits': {'tenant_id': '406904'}, 'event_type': 'compute.instance.create.*'}],
                                         'name': 'trig_def_fc1_406904',
                                         'debug_level': 2,
                                         'expiration': '$first + 10s',
                                         'fire_pipeline': 'test_pipeline',
                                         'expire_pipeline': 'test_expire_pipeline'}]

    trig_def_fc1_tenant123456_filter = [{'distinguished_by': ['instance_id'],
                                         'fire_criteria': [{'event_type': 'compute.instance.create.start'}, {'event_type': 'compute.instance.create.end'}],
                                         'match_criteria': [{'traits': {'tenant_id': '123456'},
                                                             'event_type': 'compute.instance.create.*'}],
                                         'name': 'fc1_trigger_123456',
                                         'debug_level': 2,
                                         'expiration': '$last + 24h',
                                         'fire_pipeline': 'test_pipeline',
                                         'expire_pipeline': 'test_expire_pipeline'}]

    """ test adding events to cause fire criteria """

    distilled_events_fc1_tenant_406904 = [{'os_distro': 'com.ubuntu',
                                           'event_type': 'compute.instance.create.start',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '406904',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '111-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'building',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                           'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                           'message_id': '19701f6c-f51f-4ecb-85fb-7db40277627d'},
                                          {'os_distro': 'com.ubuntu',
                                           'message_id': '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '406904',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '111-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'active',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                           'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                           'launched_at': datetime.utcnow(),
                                           'event_type': 'compute.instance.create.end'}]

    distilled_events_fc1_tenant_406904_missing_end = [{'os_distro': 'com.ubuntu',
                                                       'event_type': 'compute.instance.create.start',
                                                       'service': 'publisher-302689',
                                                       'instance_type': '512MB Standard Instance',
                                                       'tenant_id': '406904',
                                                       'instance_flavor_id': '2',
                                                       'hostname': 'server-462185',
                                                       'host': 'publisher-302689',
                                                       'instance_flavor': '512MB Standard Instance',
                                                       'instance_id': '333-3b0f-4057-b377-b65131e8532e',
                                                       'os_version': '12.04',
                                                       'state': 'building',
                                                       'os_architecture': 'x64',
                                                       'timestamp': datetime.utcnow(),
                                                       'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                                       'message_id': '19701f6c-f51f-4ecb-85fb-7db40277627d'}]

    distilled_events_fc1_tenant_123456 = [{'os_distro': 'com.ubuntu',
                                           'event_type': 'compute.instance.create.start',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '123456',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '456-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'building',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                           'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                           'message_id': '19701f6c-f51f-4ecb-85fb-7db40277627d'},
                                          {'os_distro': 'com.ubuntu',
                                           'message_id': '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '123456',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '456-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'active',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                           'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                           'launched_at': datetime.utcnow(),
                                           'event_type': 'compute.instance.create.end'}]

    """ trigger defs for fire criteria 2 - looking for exists"""
    trig_def_fc2_rackspace_billing = [{'distinguished_by': ['instance_id',
                                                            {'timestamp': 'day'}],
                                       'fire_criteria': [{'event_type': 'compute.instance.exists'}],
                                       'match_criteria': [{'event_type': ['compute.instance.*',
                                                                          '!compute.instance.exists']},
                                                          {'event_type': 'compute.instance.exists',
                                                           'map_distingushed_by': {'timestamp': 'audit_period_beginning'}}],
                                       'name': 'rackspace_billing',
                                       'debug_level': 2,
                                       'expiration': '$last + 1h',
                                       'fire_pipeline': 'test_pipeline',
                                       'expire_pipeline': 'test_expire_pipeline'}]

    trig_def_fc3_rackspace = [{'distinguished_by': ['instance_id', {'timestamp': 'day'}],
                               'fire_criteria': [{'traits': {'audit_period_ending': {'datetime': '$audit_period_beginning + 1d'}},
                                                  'event_type': 'compute.instance.exists'}],
                               'match_criteria': [{'event_type': ['compute.instance.*',
                                                                  'snapshot_instance',
                                                                  'keypair.import.*',
                                                                  'rebuild_instance',
                                                                  'compute.instance.*',
                                                                  '!compute.instance.exists',
                                                                  '!compute.instance.exists.failed',
                                                                  '!compute.instance.exists.warnings',
                                                                  '!compute.instance.exists.verified']},
                                                  {'event_type': 'compute.instance.exists',
                                                   'map_distinguished_by': {'timestamp': 'audit_period_beginning'}}],
                               'name': 'rackspace_test_trigger',
                               'debug_level': 2,
                               'expiration': '$last + 2d',
                               'fire_pipeline': 'test_fire_pipeline',
                               'expire_pipeline': 'test_expire_pipeline'}]

    distilled_events_fc2_tenant_222333 = [{'os_distro': 'com.ubuntu',
                                           'event_type': 'compute.instance.create.start',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '222333',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '772b2f73-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'building',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                           'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                           'message_id': '19701f6c-f51f-4ecb-85fb-7db40277627d'},
                                          {'os_distro': 'com.ubuntu',
                                           'message_id': '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '222333',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '772b2f73-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'active',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                              'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                              'launched_at': datetime.utcnow(),
                                              'event_type': 'compute.instance.create.end'},
                                          {'os_distro': 'com.ubuntu',
                                           'message_id': '2ae21707-70ae-48a2-89c0-b08b11dc0b1a',
                                           'service': 'publisher-302689',
                                           'instance_type': '512MB Standard Instance',
                                           'tenant_id': '222333',
                                           'instance_flavor_id': '2',
                                           'hostname': 'server-462185',
                                           'host': 'publisher-302689',
                                           'instance_flavor': '512MB Standard Instance',
                                           'instance_id': '772b2f73-3b0f-4057-b377-b65131e8532e',
                                           'os_version': '12.04',
                                           'state': 'active',
                                           'os_architecture': 'x64',
                                           'timestamp': datetime.utcnow(),
                                              'request_id': 'req-d096b6de-f451-4d00-bff0-646a8c8a23c3',
                                              'launched_at': datetime.utcnow(),
                                              'event_type': 'compute.instance.exists'}]

    def __init__(self, winchester_config):
        self.winchester_config = winchester_config

        self.config = ConfigManager.load_config_file(winchester_config)
        self.trigger_manager = TriggerManager(self.config)
        self.pipe = PipelineManager(self.config)

    def _add_unique_event(self, e):
        ''' make the static test data contain unique message id's '''
        e['message_id'] = uuid.uuid4()
        self.trigger_manager.add_event(e)

    def add_test_stream_definitions(self):
        self.trigger_manager.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant406904_filter)
        self.trigger_manager.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant123456_filter)

        self.pipe.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant406904_filter)
        self.pipe.add_trigger_definition(
            TriggerTest.trig_def_fc1_tenant123456_filter)

    def add_distilled_events_to_fire(self):
        for e in TriggerTest.distilled_events_fc1_tenant_406904:
            self._add_unique_event(e)
        for e in TriggerTest.distilled_events_fc1_tenant_123456:
            self._add_unique_event(e)

    def add_distilled_events_to_expire(self):
        for e in TriggerTest.distilled_events_fc1_tenant_406904_missing_end:
            self._add_unique_event(e)

    def add_distilled_events_with_no_match(self):
        for e in TriggerTest.distilled_events_fc2_tenant_222333:
            self._add_unique_event(e)

    def check_for_expired_streams(self):
        stream_count = self.pipe.process_ready_streams(self.pipe.pipeline_worker_batch_size,
                                                       expire=True)
        return stream_count

    def check_for_fired_streams(self):
        stream_count = self.pipe.process_ready_streams(
            self.pipe.pipeline_worker_batch_size)
        return stream_count

    def test_no_match(self):
        self.add_distilled_events_with_no_match()
        time.sleep(2)
        fired_count = self.check_for_fired_streams()
        expired_count = self.check_for_expired_streams()

        if (fired_count == 0 and expired_count == 0):
            print ("test_no_match: Success")
        else:
            print ("test_no_match: Failed")

    def test_fired(self):
        self.add_distilled_events_to_fire()
        time.sleep(3)
        fired_count = self.check_for_fired_streams()
        expired_count = self.check_for_expired_streams()
        if (expired_count == 0 and fired_count == 2):
            print ("test_fired: Success")
        else:
            print ("test_fired: Failed")

    def test_expired(self):
        self.add_distilled_events_to_expire()
        time.sleep(11)
        fired_count = self.check_for_fired_streams()
        expired_count = self.check_for_expired_streams()
        if (expired_count == 1 and fired_count == 0):
            print ("test_expired: Success")
        else:
            print ("test_expired: Failed")
    def __init__(self, winchester_config):
        self.winchester_config = winchester_config

        self.config = ConfigManager.load_config_file(winchester_config)
        self.trigger_manager = TriggerManager(self.config)
        self.pipe = PipelineManager(self.config)
Example #20
0
    def __init__(self,
                 config,
                 db=None,
                 pipeline_handlers=None,
                 pipeline_config=None,
                 trigger_defs=None,
                 time_sync=None,
                 proc_name='pipeline_worker'):
        # name used to distinguish worker processes in logs
        self.proc_name = proc_name

        logger.debug("PipelineManager(%s): Using config: %s" %
                     (self.proc_name, str(config)))
        config = ConfigManager.wrap(config, self.config_description())
        self.config = config
        self.trigger_definitions = []
        config.check_config()
        config.add_config_path(*config['config_path'])
        if time_sync is None:
            time_sync = ts.TimeSync()
        self.time_sync = time_sync

        if db is not None:
            self.db = db
        else:
            self.db = DBInterface(config['database'])

        if pipeline_handlers is not None:
            self.pipeline_handlers = pipeline_handlers
        else:
            self.pipeline_handlers = self._load_plugins(
                config['pipeline_handlers'])
        logger.debug("Pipeline handlers: %s" % str(self.pipeline_handlers))

        if pipeline_config is not None:
            self.pipeline_config = pipeline_config
        else:
            self.pipeline_config = config.load_file(config['pipeline_config'])

        logger.debug("Pipeline config: %s" % str(self.pipeline_config))
        for pipeline, handler_configs in self.pipeline_config.items():
            self.pipeline_config[pipeline] = [
                Pipeline.check_handler_config(conf, self.pipeline_handlers)
                for conf in handler_configs
            ]

        if trigger_defs is not None:
            self.trigger_definitions = trigger_defs
        else:
            # trigger_definition config file is optional
            if config.contains('trigger_definitions'):
                defs = config.load_file(config['trigger_definitions'])
                logger.debug("Loaded trigger definitions %s" % str(defs))
                self.trigger_definitions = [
                    TriggerDefinition(conf, None) for conf in defs
                ]

        self.trigger_manager = TriggerManager(
            self.config,
            db=self.db,
            trigger_defs=self.trigger_definitions,
            time_sync=time_sync)

        self.pipeline_worker_batch_size = config['pipeline_worker_batch_size']
        self.pipeline_worker_delay = config['pipeline_worker_delay']
        self.statistics_period = config['statistics_period']
        self.purge_completed_streams = config['purge_completed_streams']
        self.trim_events = config['trim_events']
        self.trim_events_batch_size = config['trim_events_batch_size']
        try:
            self.trim_events_age = timex.parse(str(config['trim_events_age']))
        except timex.TimexError:
            logger.error("Invalid trim event expression: %s Event trimming "
                         "disabled." % config['trim_events_age'])
            self.trim_events_age = None
            self.trim_events = False
        self.streams_fired = 0
        self.streams_expired = 0
        self.streams_loaded = 0
        self.last_status = self.current_time()
class EventProcessor(EventProcessorBase):

    """EventProcessor

    The EventProcessor reads distilled events from the kafka
    'transformed-events' topic, and adds them to the stacktach winchester
    TriggerManager. Adding distilled events to the TriggerManager adds all
    distilled events into the Mysql DB.  The EventProcessor reads
    stream-definitions from the kafka 'stream-definitions'
    topic and adds them to the stacktach TriggerManager.
    The TriggerManager keeps temporary streams of events in Mysql -
    filtering by 'match criteria' and grouping by 'distinguished by'
    for each stream definition.  The streams are deleted when the fire
    criteria has been met.
    """

    def __init__(self, conf):
        super(EventProcessor, self).__init__(conf)
        self._winchester_config = conf.winchester.winchester_config
        self._config_mgr = ConfigManager.load_config_file(self._winchester_config)
        self._trigger_manager = TriggerManager(self._config_mgr)
        self._group = conf.kafka.stream_def_group
        self._tm_lock = threading.Lock()

    def event_consumer(self, conf, lock, trigger_manager):
        kafka_url = conf.kafka.url
        group = conf.kafka.event_group
        topic = conf.kafka.events_topic
        kafka = KafkaClient(kafka_url)
        consumer = SimpleConsumer(kafka, group, topic, auto_commit=True)

        consumer.seek(0, 2)

        statsd = monascastatsd.Client(name="monasca", dimensions=self.dimensions)
        events_consumed = statsd.get_counter("events_consumed")
        events_persisted = statsd.get_counter("events_persisted")

        for e in consumer:
            log.debug("Received an event")
            events_consumed.increment()
            offset, message = e
            envelope = json.loads(message.value)
            event = envelope["event"]

            if "timestamp" in event:
                event["timestamp"] = iso8601.parse_date(event["timestamp"], default_timezone=None)

            lock.acquire()
            try:
                # should have add_event return True or False
                prev_saved_events = trigger_manager.saved_events
                trigger_manager.add_event(event)
                if trigger_manager.saved_events > prev_saved_events:
                    events_persisted.increment()
                else:
                    log.warning("Invalid or Duplicate Event. " "Could not add_event to mysql.")
            except Exception as e:
                log.exception(e)
            finally:
                lock.release()

    def run(self):
        """Initialize and start threads.

        The Event Processor initializes the TriggerManager with
        Trigger Defs from the DB at startup.  It reads the
        stream-def-events kafka topic for the addition/deletion of
        stream-defs from the API.  It reads the transformed-events
        kafka topic for distilled event processing.
        """

        # read stream-definitions from DB at startup and add
        stream_defs = self.stream_defs_from_database()
        if len(stream_defs) > 0:
            log.debug("Loading {} stream definitions from the DB at startup".format(len(stream_defs)))
            self._trigger_manager.add_trigger_definition(stream_defs)

        # start threads
        self.stream_def_thread = threading.Thread(
            name="stream_defs",
            target=self.stream_definition_consumer,
            args=(self.conf, self._tm_lock, self._group, self._trigger_manager),
        )

        self.event_thread = threading.Thread(
            name="events", target=self.event_consumer, args=(self.conf, self._tm_lock, self._trigger_manager)
        )

        log.debug("Starting stream_defs and events threads")
        self.stream_def_thread.start()
        self.event_thread.start()

        self.stream_def_thread.join()
        self.event_thread.join()
        log.debug("Exiting")