def process(self, event, publisher=None, manager=None, source=None, logger=None, **kwargs): """ :param TopologyManager manager: """ if manager is None: manager = singleton_per_scope(TopologyManager) # save old state old_state = self.state # process task result = super(TopoVertice, self).process( event=event, publisher=publisher, manager=manager, source=source, logger=logger, **kwargs ) # compare old state and new state if self.state != old_state: # update edges targets_by_edge = manager.get_targets(ids=self.id, add_edges=True) for edge_id in targets_by_edge: edge, _ = targets_by_edge[edge_id] # update edge state edge.state = self.state edge.save(manager=manager) # if not equal new_event = self.get_event(state=self.state, source=source) # publish a new event if publisher is not None: publish(event=new_event, publisher=publisher) # save self self.save(manager=manager) return result
def _publish_event(event): rk = event.get('rk', get_routingkey(event)) self.logger.info(u"Sending event {}".format(rk)) self.logger.debug(event) publish( event=event, rk=rk, publisher=self.amqp )
def publish_sla_event(self, event, display_name): publish(publisher=self.amqp, event=event) self.logger.debug(u'published event sla selector {}'.format( display_name ))
def publish_event(self, selector, rk, selector_event, publish_ack): selector_event['selector_id'] = selector._id self.logger.info( u'Publish event: selector={} state={}'.format( selector.display_name, selector_event['state'] ) ) if publish_ack: # Define a clean ack information to the event now = int(time()) selector_event['ack'] = { 'timestamp': now, 'rk': rk, 'author': 'canopsis', 'comment': 'All matched event are acknowleged', 'isAck': True } self.logger.debug( 'Event acknowleged because all matched events are acknowleged' ) else: # Define or reset ack key for selector generated event selector_event['ack'] = {} self.logger.debug(u'Event not acknowleged') publish(publisher=self.amqp, event=selector_event, rk=rk) self.logger.debug(u'Event sent')
def store_log(self, event, store_new_event=True): """ Stores events in events_log collection Logged events are no more in event collection at the moment """ # Ensure event Id exists from rk key event['_id'] = event['rk'] # Prepare log event collection async insert log_event = deepcopy(event) self.events_log_buffer.append({ 'event': log_event, 'collection': 'events_log' }) bulk_modulo = len(self.events_log_buffer) % self.log_bulk_amount elapsed_time = time() - self.last_bulk_insert_date if bulk_modulo == 0 or elapsed_time > self.log_bulk_delay: self.archiver.process_insert_operations( self.events_log_buffer ) self.events_log_buffer = [] self.last_bulk_insert_date = time() # Event to Alert event['event_id'] = event['rk'] publish( publisher=self.amqp, event=event, rk=event['rk'], exchange=self.amqp.exchange_name_alerts )
def publish_event(self, selector, rk, selector_event, publish_ack): selector_event['selector_id'] = selector._id self.logger.info( u'Ready to publish selector {} event with state {}'.format( selector.display_name, selector_event['state'] ) ) if publish_ack: # Define a clean ack information to the event now = int(time()) selector_event['ack'] = { 'timestamp': now, 'rk': rk, 'author': 'canopsis', 'comment': 'All matched event are acknowleged', 'isAck': True } self.logger.debug( 'Selector event is ack because ' + 'all matched NOK event are ack' ) else: # Define or reset ack key for selector generated event selector_event['ack'] = {} self.logger.debug('Selector event is NOT ack') publish(publisher=self.amqp, event=selector_event, rk=rk) self.logger.debug(u'published event selector {}'.format( selector.display_name ))
def do_job(self, job): self.logger.info(u'Execute job: {0}'.format(job)) if "params" not in job: job["params"] = {} job['params']['jobid'] = job['_id'] job['params']['jobctx'] = job.get('context', {}) publish( publisher=self.amqp, event=job['params'], rk='task_{0}'.format(job['task'][4:]), exchange='amq.direct' ) now = int(time()) self.storage.get_backend().update({'$and': [ {'_id': job['_id']}, {'$or': [ {'last_execution': {'$lte': now}}, {'last_execution': None}, ]} ]}, { '$set': { 'last_execution': now } })
def send_stat_event(self): """ Send AMQP Event for drop and pass metrics """ message_dropped = '{} event dropped since {}'.format( self.drop_event_count, self.beat_interval) message_passed = '{} event passed since {}'.format( self.pass_event_count, self.beat_interval) event = forger(connector='Engine', connector_name='engine', event_type='check', source_type='resource', resource=self.amqp_queue + '_data', state=0, state_type=1, output=message_dropped, perf_data_array=[{ 'metric': 'pass_event', 'value': self.pass_event_count, 'type': 'GAUGE' }, { 'metric': 'drop_event', 'value': self.drop_event_count, 'type': 'GAUGE' }]) self.logger.debug(message_dropped) self.logger.debug(message_passed) publish(publisher=self.amqp, event=event) self.drop_event_count = 0 self.pass_event_count = 0
def send_stat_event(self): """ Send AMQP Event for drop and pass metrics """ message_dropped = '{} event dropped since {}'.format( self.drop_event_count, self.beat_interval ) message_passed = '{} event passed since {}'.format( self.pass_event_count, self.beat_interval ) event = forger( connector='Engine', connector_name='engine', event_type='check', source_type='resource', resource=self.amqp_queue + '_data', state=0, state_type=1, output=message_dropped, perf_data_array=[ {'metric': 'pass_event', 'value': self.pass_event_count, 'type': 'GAUGE'}, {'metric': 'drop_event', 'value': self.drop_event_count, 'type': 'GAUGE'} ]) self.logger.debug(message_dropped) self.logger.debug(message_passed) publish(publisher=self.amqp, event=event) self.drop_event_count = 0 self.pass_event_count = 0
def beat(self): durations = [] while True: try: durations.append(self.durations.pop()) except IndexError: break if durations: durmin = min(durations) durmax = max(durations) duravg = sum(durations) / len(durations) event = { "connector": "Engine", "connector_name": self.etype, "event_type": "perf", "source_type": "component", "component": "__canopsis__", "perf_data_array": [ {"metric": "cps_evt_duration_min", "value": durmin, "unit": "s", "type": "GAUGE"}, {"metric": "cps_evt_duration_max", "value": durmax, "unit": "s", "type": "GAUGE"}, {"metric": "cps_evt_duration_avg", "value": duravg, "unit": "s", "type": "GAUGE"}, ], } publish(publisher=self.amqp, event=event)
def a_snooze(self, event, action, name): """ Snooze event checks :param dict event: event to be snoozed :param dict action: action :param str name: name of the rule :returns: True if a snooze has been sent, False otherwise :rtype: boolean """ if event.get('event_type') == 'snooze': return False # Only check events can trigger an auto-snooze if event.get('event_type') != 'check': return False # A check OK cannot trigger an auto-snooze if event.get('state') == 0: return False # Alerts manager caching if not hasattr(self, 'am'): self.am = Alerts(*Alerts.provide_default_basics()) # Context manager caching if not hasattr(self, 'cm'): self.cm = ContextGraph(self.logger) entity_id = self.cm.get_id(event) current_alarm = self.am.get_current_alarm(entity_id) if current_alarm is None: snooze = { 'connector': event.get('connector', ''), 'connector_name': event.get('connector_name', ''), 'source_type': event.get('source_type', ''), 'component': event.get('component', ''), 'event_type': 'snooze', 'duration': action['duration'], 'author': 'event_filter', 'output': 'Auto snooze generated by rule "{}"'.format(name), 'timestamp': int(time.time()) } if event.get('resource', ''): snooze['resource'] = event['resource'] publish(event=snooze, publisher=self.amqp, rk='Engine_event_filter', exchange='amq.direct') return True return False
def a_exec_job(self, event, action, name): records = self.storage.find( {'crecord_type': 'job', '_id': action['job'] } ) for record in records: job = record.dump() job['context'] = event publish(publisher=self.amqp, event=job, rk='Engine_scheduler', exchange='amq.direct') #publish(publisher=self.amqp, event=job, rk='Engine_scheduler') return True
def store_check(self, event): _id = self.archiver.check_event(event['rk'], event) if _id: event['_id'] = _id event['event_id'] = event['rk'] # Event to Alert publish( publisher=self.amqp, event=event, rk=event['rk'], exchange=self.amqp.exchange_name_alerts )
def fire_events(): manager = TopologyManager() publisher = Amqp() graphs = manager.get_graphs() for graph in graphs: graph.state = 0 event = graph.get_event(state=0) publish(publisher=publisher, event=event)
def a_snooze(self, event, action, name): """ Snooze event checks :param dict event: event to be snoozed :param dict action: action :param str name: name of the rule :returns: True if a snooze has been sent, False otherwise :rtype: boolean """ # Only check events can trigger an auto-snooze if event['event_type'] != 'check': return False # A check OK cannot trigger an auto-snooze if event['state'] == 0: return False # Alerts manager caching if not hasattr(self, 'am'): self.am = Alerts() # Context manager caching if not hasattr(self, 'cm'): self.cm = Context() entity = self.cm.get_entity(event) entity_id = self.cm.get_entity_id(entity) current_alarm = self.am.get_current_alarm(entity_id) if current_alarm is None: snooze = { 'connector': event.get('connector', ''), 'connector_name': event.get('connector_name', ''), 'source_type': event.get('source_type', ''), 'component': event.get('component', ''), 'event_type': 'snooze', 'duration': action['duration'], 'author': 'event_filter', 'output': 'Auto snooze generated by rule "{}"'.format(name), } if 'resource' in event: snooze['resource'] = event['resource'] publish(event=snooze, publisher=self.amqp) return True return False
def a_exec_job(self, event, action, name): records = self.collection.find({ 'crecord_type': 'job', '_id': action['job'] }) for record in records: job = copy.deepcopy(record) job['context'] = event publish(publisher=self.amqp, event=job, rk='Engine_scheduler', exchange='amq.direct') time.sleep(1) return True
def a_baseline(self, event, actions, name): """a_baseline :param event: :param action: baseline conf in event filter :param name: """ event['baseline_name'] = actions['baseline_name'] event['check_frequency'] = actions['check_frequency'] publish(event=event, publisher=self.amqp, rk='Engine_baseline', exchange='amq.direct')
def publish_states(self): stats_event = forger( connector="engine", connector_name="engine", event_type="perf", source_type="resource", resource="Engine_stats", state=0, perf_data_array=self.perf_data_array, ) self.logger.debug("Publishing {}".format(stats_event)) publish(publisher=self.amqp, event=stats_event)
def beat_processing(engine, manager=None, logger=None, **_): """Engine beat processing task.""" if manager is None: manager = singleton_per_scope(Serie) with engine.Lock(engine, 'serie_fetching') as lock: if lock.own(): for serie in manager.get_series(time()): publish( publisher=engine.amqp, event=serie, rk=engine.amqp_queue, exchange='amq.direct', logger=logger )
def beat(self): with self.Lock(self, 'selector_processing') as l: if l.own(): events = [ selector.dump() for selector in self.get_selectors() ] for event in events: publish( publisher=self.amqp, event=event, rk=self.amqp_queue, exchange='amq.direct', logger=self.logger )
def store_check(self, event): _id = self.archiver.check_event(event['rk'], event) if event.get('downtime', False): event['previous_state_change_ts'] = \ self.cdowntime.get_downtime_end_date( event['component'], event.get('resource', '')) if _id: event['_id'] = _id event['event_id'] = event['rk'] # Event to Alert publish( publisher=self.amqp, event=event, rk=event['rk'], exchange=self.amqp.exchange_name_alerts )
def beat(self): durations = [] while True: try: durations.append(self.durations.pop()) except IndexError: break if durations: durmin = min(durations) durmax = max(durations) duravg = sum(durations) / len(durations) event = { 'connector': 'Engine', 'connector_name': self.etype, 'event_type': 'perf', 'source_type': 'component', 'component': '__canopsis__', 'perf_data_array': [{ 'metric': 'cps_evt_duration_min', 'value': durmin, 'unit': 's', 'type': 'GAUGE' }, { 'metric': 'cps_evt_duration_max', 'value': durmax, 'unit': 's', 'type': 'GAUGE' }, { 'metric': 'cps_evt_duration_avg', 'value': duravg, 'unit': 's', 'type': 'GAUGE' }] } publish(publisher=self.amqp, event=event)
def consume_dispatcher(self, event, *args, **kargs): self.logger.debug("Start metrics consolidation") t_serie = event.copy() self.logger.debug('\n\n\n\n----->serie: {}'.format(t_serie)) if not t_serie: # Show error message self.logger.error('No record found.') # Test Settings _from = 1425394522 _to = 1425402296 perf_data_array = [] _, points = self.fetch(t_serie, _from, _to) # This method allow us to update an metric or a list of metrics self.manager.put(metric_id=t_serie['_id'], points=points) # Publish the consolidation metrics # metric_name = 'metric_name' # Change the value with UI data for t, v in points: #c_event['timestamp'] = t perf_data_array.append( { 'metric': t_serie['_id'], 'value': v, 'unit': t_serie['_id'], 'min': None, 'max': None, 'warn': None, 'crit': None, 'type': 'GAUGE' } ) conso_event = forger( timestamp=t, component='conso', connector='Engine', connector_name='consolidation', event_type='perf', source_type='component', perf_data_array=perf_data_array ) self.logger.debug('Publishing {}'.format(conso_event)) publish(publisher=self.amqp, event=conso_event) perf_data_array = [] # reset the perf_data_array data # Update crecords informations event_id = t_serie['_id'] self.crecord_task_complete(event_id)
def store_check(self, event): _id = self.archiver.check_event(event['rk'], event) if event.get('downtime', False): entity = self.context.get_entity(event) entity_id = self.context.get_entity_id(entity) endts = self.pbehavior.getending( source=entity_id, behaviors='downtime' ) event['previous_state_change_ts'] = endts if _id: event['_id'] = _id event['event_id'] = event['rk'] # Event to Alert publish( publisher=self.amqp, event=event, rk=event['rk'], exchange=self.amqp.exchange_name_alerts )
def set_derogation_state(self, derogation, active): dactive = derogation.get('active', False) name = derogation.get('crecord_name', None) notify = False state = 0 if active: if not dactive: self.logger.info("%s (%s) is now active" % ( derogation['crecord_name'], derogation['_id']) ) self.storage.update(derogation['_id'], {'active': True}) notify = True else: if dactive: self.logger.info("%s (%s) is now inactive" % ( derogation['crecord_name'], derogation['_id']) ) self.storage.update(derogation['_id'], {'active': False}) notify = True if notify: if active: output = "Derogation '%s' is now active" % name state = 1 else: output = "Derogation '%s' is now inactive" % name event = forger( connector="Engine", connector_name="engine", event_type="log", source_type="component", component=self.etype, state=state, output=output, long_output=derogation.get('description', None) ) publish(publisher=self.amqp, event=event)
def publish_record(self, event, crecord_type): try: if crecord_type == 'serie': rk = 'dispatcher.{0}'.format('consolidation') else: rk = 'dispatcher.{0}'.format(crecord_type) #rk = 'dispatcher.{0}'.format(crecord_type) self.amqp.get_exchange('media') publish(publisher=self.amqp, event=event, rk=rk, exchange='media') return True except Exception as e: # Will be reloaded on next beat self.logger.error('Unable to send crecord {} error : {}'.format( crecord_type, e )) return False
def send_perfdata(self, uuid, time, updated, deleted): """Send stat about the import through a perfdata event. :param str uuid: the import uuid :param float time: the execution time of the import :param int updated: the number of updated entities during the import :param int deleted: the number of deleted entities during the import """ # define the state according to the duration of the import if time > self._thd_crit_s: state = ST_WARNING elif time > self._thd_warn_s: state = ST_MINOR else: state = ST_INFO perf_data_array[0]["value"] = time perf_data_array[1]["value"] = updated perf_data_array[2]["value"] = deleted output = "execution : {0} sec, updated ent :"\ " {1}, deleted ent : {2}".format(time, updated, deleted) self.logger.critical("AMQP queue = {0}".format(self.amqp_queue)) # create a perfdata event event = forger(connector="Taskhandler", connector_name=self.etype, component=uuid, event_type="check", source_type="resource", resource="task_importctx/report", state=state, state_type=1, output=output, perf_data_array=perf_data_array) publish(event, self.amqp)
def do_job(self, job): self.logger.info(u'Execute job: {0}'.format(job)) job['params']['jobid'] = job['_id'] job['params']['jobctx'] = job.get('context', {}) publish( publisher=self.amqp, event=job['params'], rk='task_{0}'.format(job['task'][4:]), exchange='amq.direct' ) now = int(time()) self.storage.get_backend().update({'$and': [ {'_id': job['_id']}, {'$or': [ {'last_execution': {'$lte': now}}, {'last_execution': None}, ]} ]}, { '$set': { 'last_execution': now } })
def work(self, event, *args, **kargs): logevent = None ackremove = False state = event.get('state', 0) state_type = event.get('state_type', 1) if event['event_type'] == 'ackremove': # remove ack from event # Ack remove information exists when ack is just removed # And deleted if event is ack again rk = event['ref_rk'] self.events_collection.update( {'_id': rk}, { '$set': { 'ack_remove': { 'author': event['author'], 'comment': event['output'], 'timestamp': time() }, 'ack': '' }, '$unset': { 'ticket_declared_author': '', 'ticket_declared_date': '', 'ticket': '', 'ticket_date': '' } } ) ackremove = True # If event is of type ack, then ack reference event if event['event_type'] == 'ack': self.logger.debug(u'Ack event found, will proceed ack.') rk = event.get('referer', event.get('ref_rk', None)) if event.get("source_type") == "component" and\ event.get("ack_resources") in [True, "true", "True"]: # fetch not ok component's resources component = event.get("component") sub_res_query = {"component": component, "state": {"$ne": "0"}, "source_type": "resource"} result_cur = self.events_collection.find(sub_res_query) for resource in result_cur: sub_ack_event = { "ref_rk": resource.get("_id"), "author": event.get("author"), "output": event.get("output"), "authkey": event.get("authkey"), "connector": resource.get("connector"), "connector_name": resource.get("connector_name"), "event_type": "ack", "source_type": "resource", "component": component, "resource": resource.get("resource") } if "ticket" in event: sub_ack_event["ticket"] = event.get("ticket") publish( publisher=self.amqp, event=sub_ack_event, exchange=self.acknowledge_on ) author = event['author'] self.logger.debug(dumps(event, indent=2)) if not rk: self.logger.error( 'Cannot get acknowledged event, missing referer or ref_rk' ) return event for comment in self.comments: if comment['comment'] in event['output']: # An ack comment is contained into a defined comment # Then let save referer key to the comment # Set referer rk to last update date self.objects_backend.update( {'_id': comment['_id']}, {"$addToSet": {'referer_event_rks': {'rk': rk}}}, upsert=True) self.logger.info( 'Added a referer rk to the comment {}'.format( comment['comment'] ) ) ackts = int(time()) ack_info = { 'timestamp': event['timestamp'], 'ackts': ackts, 'rk': rk, 'author': author, 'comment': event['output'] } # add rk to acknowledged rks response = self.stbackend.find_and_modify( query={'rk': rk, 'solved': False}, update={'$set': ack_info}, upsert=True, full_response=True, new=True ) self.logger.debug( u'Updating event {} with author {} and comment {}'.format( rk, author, ack_info['comment'] ) ) ack_info['isAck'] = True # Useless information for event ack data del ack_info['ackts'] # clean eventual previous ack remove information self.events_collection.update( { '_id': rk }, { '$set': { 'ack': ack_info, }, '$unset': { 'ack_remove': '', } } ) # When an ack status is changed # Emit an event log referer_event = self.storage.find_one( mfilter={'_id': rk}, namespace='events' ) if referer_event: referer_event = referer_event.dump() # Duration between event last state and acknolegement date duration = ackts - referer_event.get( 'last_state_change', event['timestamp'] ) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=referer_event['source_type'], component=referer_event['component'], resource=referer_event.get('resource', None), state=0, state_type=1, ref_rk=event['rk'], output=u'Event {0} acknowledged by {1}'.format( rk, author), long_output=event['output'], ) # Now update counters ackhost = is_host_acknowledged(event) # Cast response to ! 0|1 cvalues = int(not ackhost) ack_event = deepcopy(self.ack_event) ack_event['component'] = author ack_event['perf_data_array'] = [ { 'metric': 'alerts_by_host', 'value': cvalues, 'type': 'COUNTER' }, { 'metric': 'alerts_count{}'.format( self.get_metric_name_adp(event) ), 'value': 1, 'type': 'COUNTER' }, { 'metric': 'delay', 'value': duration, 'type': 'COUNTER' } ] publish( publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on ) self.logger.debug(u'Ack internal metric sent. {}'.format( dumps(ack_event['perf_data_array'], indent=2) )) for hostgroup in event.get('hostgroups', []): ack_event = deepcopy(self.ack_event) ack_event['perf_data_array'] = [ { 'metric': 'alerts', 'value': cvalues, 'type': 'COUNTER' } ] publish( publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on ) self.logger.debug(u'Reloading ack cache') self.reload_ack_cache() # If event is acknowledged, and went back to normal, remove the ack # This test concerns most of case # And could not perform query for each event elif state == 0 and state_type == 1: solvedts = int(time()) if event['rk'] in self.cache_acks: self.logger.debug( 'Ack exists for this event, and has to be recovered.' ) # We have an ack to process for this event query = { 'rk': event['rk'], 'solved': False, 'ackts': {'$gt': -1} } ack = self.stbackend.find_one(query) if ack: ackts = ack['ackts'] self.stbackend.update( query, { '$set': { 'solved': True, 'solvedts': solvedts } } ) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=event['source_type'], component=event['component'], resource=event.get('resource', None), state=0, state_type=1, ref_rk=event['rk'], output=u'Acknowledgement removed for event {0}'.format( event['rk']), long_output=u'Everything went back to normal' ) logevent['acknowledged_connector'] = event['connector'] logevent['acknowledged_source'] = event['connector_name'] logevent['acknowledged_at'] = ackts logevent['solved_at'] = solvedts # Metric for solved alarms ack_event = deepcopy(self.ack_event) ack_event['component'] = 'solved_alert' ack_event['perf_data_array'] = [ { 'metric': 'delay', 'value': solvedts - ackts, 'unit': 's' }, { 'metric': 'count', 'value': 1, 'type': 'COUNTER' } ] publish( publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on ) # If the event is in problem state, # update the solved state of acknowledgement elif ackremove or (state != 0 and state_type == 1): self.logger.debug(u'Alert on event, preparing ACK statement.') self.stbackend.find_and_modify( query={'rk': event['rk'], 'solved': True}, update={'$set': { 'solved': False, 'solvedts': -1, 'ackts': -1, 'timestamp': -1, 'author': '', 'comment': '' }} ) if logevent: self.logger.debug(u'publishing log event {}'.format( dumps(logevent, indent=2) )) publish( publisher=self.amqp, event=logevent, exchange=self.acknowledge_on ) return event
def work(self, event, *args, **kwargs): if 'job' in self.config: if 'rrule' in self.config['job']: del self.config['job']['rrule'] if event['event_type'] == 'declareticket': self.logger.debug(u'Declare Ticket') try: refevt = self.store.get(event['ref_rk'], namespace='events') refevt = refevt.dump() except KeyError: refevt = {} if refevt.get('ack', {}) == {}: sleep(2) try: refevt = self.store.get(event['ref_rk'], namespace='events') refevt = refevt.dump() except KeyError: refevt = {} job = deepcopy(self.config['job']) job['_id'] = self.config['_id'] job['context'] = refevt publish(publisher=self.amqp, event=job, rk='Engine_scheduler', exchange='amq.direct') self.logger.info('Setting ticked received for {}'.format( event['ref_rk'])) self.store.get_backend('events').update( {'rk': event['ref_rk']}, { '$set': { 'ticket_declared_author': event['author'], 'ticket_declared_date': int(time()), } }) elif (event['event_type'] in ['ack', 'assocticket'] and 'ticket' in event): self.logger.info('Associate ticket for event type {}'.format( event['event_type'])) events = self.store.get_backend('events') self.logger.info('Update events with rk {0}'.format( event['ref_rk'])) events.update({'rk': event['ref_rk']}, { '$set': { 'ticket': event['ticket'], 'ticket_date': int(time()) } }) return event
def work(self, event, *args, **kwargs): # If the event is a downtime event, # add entry to the downtime collection if event['event_type'] == 'downtime': self.logger.debug( 'Event downtime received: {0}'.format(event['rk'])) # Build entry, so we know there is a downtime on the component record = Record({ '_expire': event['start'] + event['duration'], 'connector': event['connector'], 'source': event['connector_name'], 'component': event['component'], 'resource': event.get('resource', None), 'start': event['start'], 'end': event['end'], 'fixed': event['fixed'], 'timestamp': event['entry'], 'author': event['author'], 'comment': event['output'] }) # Save record, and log the action record.save(self.storage) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=event['source_type'], component=event['component'], resource=event.get('resource', None), state=0, state_type=1, output=u'Downtime scheduled by {0} from {1} to {2}'.format( event['author'], event['start'], event['end'] ), long_output=event['output'] ) logevent['downtime_connector'] = event['connector'] logevent['downtime_source'] = event['connector_name'] publish(publisher=self.amqp, event=logevent) # Set downtime for events already in database self.evt_backend.update( { 'connector': event['connector'], 'connector_name': event['connector_name'], 'component': event['component'], 'resource': event.get('resource', None) }, { '$set': { 'downtime': True } }, multi=True ) # Takes care of the new downtime self.cdowntime.reload(delta_beat=self.beat_interval) # For every other case, check if the event is in downtime else: event['downtime'] = False if (self.cdowntime.is_downtime( event.get('component', ''), event.get('resource', ''))): event['downtime'] = True self.logger.debug( 'Received event: {0}, and set downtime to {1}'.format( event['rk'], event['downtime'])) return event
def work(self, event, *args, **kargs): logevent = None ackremove = False state = event.get('state', 0) state_type = event.get('state_type', 1) if event['event_type'] == 'ackremove': # remove ack from event # Ack remove information exists when ack is just removed # And deleted if event is ack again rk = event['ref_rk'] self.events_collection.update( {'_id': rk}, { '$set': { 'ack_remove': { 'author': event['author'], 'comment': event['output'], 'timestamp': time() } }, '$unset': { 'ack': '', 'ticket_declared_author': '', 'ticket_declared_date': '', 'ticket': '', 'ticket_date': '' } } ) ackremove = True # If event is of type ack, then ack reference event if event['event_type'] == 'ack': self.logger.debug('Ack event found, will proceed ack.') rk = event.get('referer', event.get('ref_rk', None)) author = event['author'] self.logger.debug(dumps(event, indent=2)) if not rk: self.logger.error( 'Cannot get acknowledged event, missing referer or ref_rk' ) return event for comment in self.comments: if comment['comment'] in event['output']: # An ack comment is contained into a defined comment # Then let save referer key to the comment # Set referer rk to last update date self.objects_backend.update( {'_id': comment['_id']}, {"$addToSet": {'referer_event_rks': {'rk': rk}}}, upsert=True) self.logger.info( 'Added a referer rk to the comment {}'.format( comment['comment'] ) ) ackts = int(time()) ack_info = { 'timestamp': event['timestamp'], 'ackts': ackts, 'rk': rk, 'author': author, 'comment': event['output'] } # add rk to acknowledged rks response = self.stbackend.find_and_modify( query={'rk': rk, 'solved': False}, update={'$set': ack_info}, upsert=True, full_response=True, new=True ) self.logger.debug( u'Updating event {} with author {} and comment {}'.format( rk, author, ack_info['comment'] ) ) ack_info['isAck'] = True # Useless information for event ack data del ack_info['ackts'] # clean eventual previous ack remove information self.events_collection.update( { '_id': rk }, { '$set': { 'ack': ack_info, }, '$unset': { 'ack_remove': '', } } ) # When an ack status is changed # Emit an event log referer_event = self.storage.find_one( mfilter={'_id': rk}, namespace='events' ) if referer_event: referer_event = referer_event.dump() # Duration between event last state and acknolegement date duration = ackts - referer_event.get( 'last_state_change', event['timestamp'] ) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=referer_event['source_type'], component=referer_event['component'], resource=referer_event.get('resource', None), state=0, state_type=1, ref_rk=event['rk'], output=u'Event {0} acknowledged by {1}'.format( rk, author), long_output=event['output'], ) # Now update counters ackhost = is_host_acknowledged(event) # Cast response to ! 0|1 cvalues = int(not ackhost) ack_event = deepcopy(self.ack_event) ack_event['component'] = author ack_event['perf_data_array'] = [ { 'metric': 'alerts_by_host', 'value': cvalues, 'type': 'COUNTER' }, { 'metric': 'alerts_count{}'.format( self.get_metric_name_adp(event) ), 'value': 1, 'type': 'COUNTER' }, { 'metric': 'delay', 'value': duration, 'type': 'COUNTER' } ] publish( publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on ) self.logger.debug('Ack internal metric sent. {}'.format( dumps(ack_event['perf_data_array'], indent=2) )) for hostgroup in event.get('hostgroups', []): ack_event = deepcopy(self.ack_event) ack_event['perf_data_array'] = [ { 'metric': 'alerts', 'value': cvalues, 'type': 'COUNTER' } ] publish( publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on ) self.logger.debug('Reloading ack cache') self.reload_ack_cache() # If event is acknowledged, and went back to normal, remove the ack # This test concerns most of case # And could not perform query for each event elif state == 0 and state_type == 1: solvedts = int(time()) if event['rk'] in self.cache_acks: self.logger.debug( 'Ack exists for this event, and has to be recovered.' ) # We have an ack to process for this event query = { 'rk': event['rk'], 'solved': False, 'ackts': {'$gt': -1} } ack = self.stbackend.find_one(query) if ack: ackts = ack['ackts'] self.stbackend.update( query, { '$set': { 'solved': True, 'solvedts': solvedts } } ) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=event['source_type'], component=event['component'], resource=event.get('resource', None), state=0, state_type=1, ref_rk=event['rk'], output=u'Acknowledgement removed for event {0}'.format( event['rk']), long_output=u'Everything went back to normal' ) logevent['acknowledged_connector'] = event['connector'] logevent['acknowledged_source'] = event['connector_name'] logevent['acknowledged_at'] = ackts logevent['solved_at'] = solvedts # Metric for solved alarms ack_event = deepcopy(self.ack_event) ack_event['component'] = 'solved_alert' ack_event['perf_data_array'] = [ { 'metric': 'delay', 'value': solvedts - ackts, 'unit': 's' }, { 'metric': 'count', 'value': 1, 'type': 'COUNTER' } ] publish( publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on ) # If the event is in problem state, # update the solved state of acknowledgement elif ackremove or (state != 0 and state_type == 1): self.logger.debug('Alert on event, preparing ACK statement.') self.stbackend.find_and_modify( query={'rk': event['rk'], 'solved': True}, update={'$set': { 'solved': False, 'solvedts': -1, 'ackts': -1, 'timestamp': -1, 'author': '', 'comment': '' }} ) if logevent: self.logger.debug('publishing log event {}'.format( dumps(logevent, indent=2) )) publish( publisher=self.amqp, event=logevent, exchange=self.acknowledge_on ) return event
def on_collectd_event(self, body, msg): start = time() error = False prog = re.compile('^(PUTVAL) ("(.+)"|([^\s]+)) (interval=.+) ([^\s]+)$') match = re.match(prog, body) if match: if match.group(3): cnode = match.group(3) else: cnode = match.group(4) collectd_info = [match.group(1),cnode,match.group(5),match.group(6)] self.logger.debug(body) action = collectd_info[0] self.logger.debug(" + Action: %s" % action) if len(collectd_info) == 4 and action == "PUTVAL": cnode = collectd_info[1].split("/") component = cnode[0] resource = cnode[1] metric = cnode[2] options = collectd_info[2] values = collectd_info[3] self.logger.debug(" + Options: %s" % options) self.logger.debug(" + Component: %s" % component) self.logger.debug(" + Resource: %s" % resource) self.logger.debug(" + Metric: %s" % metric) self.logger.debug(" + Raw Values: %s" % values) values = values.split(":") perf_data_array = [] ctype = None try: ## Know metric ctype = types[metric] except: try: ctype = types[metric.split('-')[0]] metric = metric.split('-')[1] except Exception as err: self.logger.error( "Invalid format '%s' (%s)" % (body, err)) return None try: timestamp = int(float(values[0])) values = values[1:] self.logger.debug(" + Timestamp: %s" % timestamp) self.logger.debug(" + Values: %s" % values) except Exception as err: self.logger.error( "Impossible to get timestamp or values (%s)" % err) return None self.logger.debug(" + metric: %s" % metric) self.logger.debug(" + ctype: %s" % ctype) if ctype: try: i = 0 for value in values: name = ctype[i]['name'] unit = ctype[i]['unit'] vmin = ctype[i]['min'] vmax = ctype[i]['max'] if vmin == 'U': vmin = None if vmax == 'U': vmax = None if name == "value": name = metric if metric != name: name = "%s-%s" % (metric, name) data_type = ctype[i]['type'] value = float(value) self.logger.debug(" + %s" % name) self.logger.debug( " -> %s (%s)" % (value, data_type)) i += 1 perf_data_array.append( { 'metric': name, 'value': value, 'type': data_type, 'unit': unit, 'min': vmin, 'max': vmax } ) except Exception as err: self.logger.error( "Impossible to parse values '%s' (%s)" % ( values, err)) if perf_data_array: self.logger.debug( ' + perf_data_array: %s', perf_data_array) event = forger( connector='collectd', connector_name='collectd2event', component=component, resource=resource, timestamp=timestamp, source_type='resource', event_type='perf', perf_data_array=perf_data_array ) self.logger.debug("Send Event: %s" % event) ## send event on amqp publish(publisher=self.amqp, event=event) else: error = True self.logger.error("Invalid collectd Action (%s)" % body) else: self.logger.error("Invalid collectd Message (%s)" % body) if error: self.counter_error += 1 self.counter_event += 1 self.counter_worktime += time() - start
def work(self, event, *args, **kargs): logevent = None ackremove = False state = event.get("state", 0) state_type = event.get("state_type", 1) if event["event_type"] == "ackremove": # remove ack from event # Ack remove information exists when ack is just removed # And deleted if event is ack again rk = event["ref_rk"] self.events_collection.update( {"_id": rk}, { "$set": { "ack_remove": {"author": event["author"], "comment": event["output"], "timestamp": time()} }, "$unset": { "ack": "", "ticket_declared_author": "", "ticket_declared_date": "", "ticket": "", "ticket_date": "", }, }, ) ackremove = True # If event is of type ack, then ack reference event if event["event_type"] == "ack": self.logger.debug(u"Ack event found, will proceed ack.") rk = event.get("referer", event.get("ref_rk", None)) author = event["author"] self.logger.debug(dumps(event, indent=2)) if not rk: self.logger.error("Cannot get acknowledged event, missing referer or ref_rk") return event for comment in self.comments: if comment["comment"] in event["output"]: # An ack comment is contained into a defined comment # Then let save referer key to the comment # Set referer rk to last update date self.objects_backend.update( {"_id": comment["_id"]}, {"$addToSet": {"referer_event_rks": {"rk": rk}}}, upsert=True ) self.logger.info("Added a referer rk to the comment {}".format(comment["comment"])) ackts = int(time()) ack_info = { "timestamp": event["timestamp"], "ackts": ackts, "rk": rk, "author": author, "comment": event["output"], } # add rk to acknowledged rks response = self.stbackend.find_and_modify( query={"rk": rk, "solved": False}, update={"$set": ack_info}, upsert=True, full_response=True, new=True ) self.logger.debug( u"Updating event {} with author {} and comment {}".format(rk, author, ack_info["comment"]) ) ack_info["isAck"] = True # Useless information for event ack data del ack_info["ackts"] # clean eventual previous ack remove information self.events_collection.update({"_id": rk}, {"$set": {"ack": ack_info}, "$unset": {"ack_remove": ""}}) # When an ack status is changed # Emit an event log referer_event = self.storage.find_one(mfilter={"_id": rk}, namespace="events") if referer_event: referer_event = referer_event.dump() # Duration between event last state and acknolegement date duration = ackts - referer_event.get("last_state_change", event["timestamp"]) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=referer_event["source_type"], component=referer_event["component"], resource=referer_event.get("resource", None), state=0, state_type=1, ref_rk=event["rk"], output=u"Event {0} acknowledged by {1}".format(rk, author), long_output=event["output"], ) # Now update counters ackhost = is_host_acknowledged(event) # Cast response to ! 0|1 cvalues = int(not ackhost) ack_event = deepcopy(self.ack_event) ack_event["component"] = author ack_event["perf_data_array"] = [ {"metric": "alerts_by_host", "value": cvalues, "type": "COUNTER"}, {"metric": "alerts_count{}".format(self.get_metric_name_adp(event)), "value": 1, "type": "COUNTER"}, {"metric": "delay", "value": duration, "type": "COUNTER"}, ] publish(publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on) self.logger.debug(u"Ack internal metric sent. {}".format(dumps(ack_event["perf_data_array"], indent=2))) for hostgroup in event.get("hostgroups", []): ack_event = deepcopy(self.ack_event) ack_event["perf_data_array"] = [{"metric": "alerts", "value": cvalues, "type": "COUNTER"}] publish(publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on) self.logger.debug(u"Reloading ack cache") self.reload_ack_cache() # If event is acknowledged, and went back to normal, remove the ack # This test concerns most of case # And could not perform query for each event elif state == 0 and state_type == 1: solvedts = int(time()) if event["rk"] in self.cache_acks: self.logger.debug("Ack exists for this event, and has to be recovered.") # We have an ack to process for this event query = {"rk": event["rk"], "solved": False, "ackts": {"$gt": -1}} ack = self.stbackend.find_one(query) if ack: ackts = ack["ackts"] self.stbackend.update(query, {"$set": {"solved": True, "solvedts": solvedts}}) logevent = forger( connector="Engine", connector_name=self.etype, event_type="log", source_type=event["source_type"], component=event["component"], resource=event.get("resource", None), state=0, state_type=1, ref_rk=event["rk"], output=u"Acknowledgement removed for event {0}".format(event["rk"]), long_output=u"Everything went back to normal", ) logevent["acknowledged_connector"] = event["connector"] logevent["acknowledged_source"] = event["connector_name"] logevent["acknowledged_at"] = ackts logevent["solved_at"] = solvedts # Metric for solved alarms ack_event = deepcopy(self.ack_event) ack_event["component"] = "solved_alert" ack_event["perf_data_array"] = [ {"metric": "delay", "value": solvedts - ackts, "unit": "s"}, {"metric": "count", "value": 1, "type": "COUNTER"}, ] publish(publisher=self.amqp, event=ack_event, exchange=self.acknowledge_on) # If the event is in problem state, # update the solved state of acknowledgement elif ackremove or (state != 0 and state_type == 1): self.logger.debug(u"Alert on event, preparing ACK statement.") self.stbackend.find_and_modify( query={"rk": event["rk"], "solved": True}, update={ "$set": {"solved": False, "solvedts": -1, "ackts": -1, "timestamp": -1, "author": "", "comment": ""} }, ) if logevent: self.logger.debug(u"publishing log event {}".format(dumps(logevent, indent=2))) publish(publisher=self.amqp, event=logevent, exchange=self.acknowledge_on) return event
def beat_processing( engine, sessionmgr=None, eventmgr=None, usermgr=None, alertsmgr=None, logger=None, **kwargs ): if sessionmgr is None: sessionmgr = singleton_per_scope(Session) if eventmgr is None: eventmgr = singleton_per_scope(EventMetricProducer) if usermgr is None: usermgr = singleton_per_scope(UserMetricProducer) if alertsmgr is None: alertsmgr = singleton_per_scope(Alerts) storage = alertsmgr[alertsmgr.ALARM_STORAGE] events = sessionmgr.duration() with engine.Lock(engine, 'alarm_stats_computation') as l: if l.own(): resolved_alarms = alertsmgr.get_alarms( resolved=True, exclude_tags='stats' ) for data_id in resolved_alarms: for docalarm in resolved_alarms[data_id]: docalarm[storage.DATA_ID] = data_id alarm = docalarm[storage.VALUE] alarm_ts = docalarm[storage.TIMESTAMP] alarm_events = alertsmgr.get_events(docalarm) solved_delay = alarm['resolved'] - alarm_ts events.append(eventmgr.alarm_solved_delay(solved_delay)) if alarm['ack'] is not None: ack_ts = alarm['ack']['t'] ackremove = get_previous_step( alarm, 'ackremove', ts=ack_ts ) ts = alarm_ts if ackremove is None else ackremove['t'] ack_delay = ack_ts - ts events.append(eventmgr.alarm_ack_delay(ack_delay)) events.append( eventmgr.alarm_ack_solved_delay( solved_delay - ack_delay ) ) events.append(usermgr.alarm_ack_delay( alarm['ack']['a'], ack_delay )) if len(alarm_events) > 0: events.append(eventmgr.alarm(alarm_events[0])) for event in alarm_events: if event['event_type'] == 'ack': events.append(eventmgr.alarm_ack(event)) events.append( usermgr.alarm_ack(event, event['author']) ) elif event['timestamp'] == alarm['resolved']: events.append(eventmgr.alarm_solved(event)) if alarm['ack'] is not None: events.append(eventmgr.alarm_ack_solved(event)) events.append( usermgr.alarm_ack_solved( alarm['ack']['a'], alarm['resolved'] - alarm['ack']['t'] ) ) events.append( usermgr.alarm_solved( alarm['ack']['a'], alarm['resolved'] - alarm_ts ) ) alertsmgr.update_current_alarm( docalarm, alarm, tags='stats' ) for event in events: publish(publisher=engine.amqp, event=event, logger=logger)
def work(self, event, *args, **kwargs): if 'job' in self.config: if 'rrule' in self.config['job']: del self.config['job']['rrule'] if event['event_type'] == 'declareticket': self.logger.debug('Declare Ticket') try: refevt = self.store.get( event['ref_rk'], namespace='events' ) refevt = refevt.dump() except KeyError: refevt = {} job = deepcopy(self.config['job']) job['_id'] = self.config['_id'] job['context'] = refevt publish( publisher=self.amqp, event=job, rk='Engine_scheduler', exchange='amq.direct' ) self.logger.info( 'Setting ticked received for {}' .format(event['ref_rk']) ) self.store.get_backend('events').update({ 'rk': event['ref_rk'] }, { '$set': { 'ticket_declared_author': event['author'], 'ticket_declared_date': int(time()), } }) elif (event['event_type'] in ['ack', 'assocticket'] and 'ticket' in event): self.logger.info( 'Associate ticket for event type {}' .format(event['event_type']) ) events = self.store.get_backend('events') self.logger.info( 'Update events with rk {0}' .format(event['ref_rk']) ) events.update({ 'rk': event['ref_rk'] }, { '$set': { 'ticket': event['ticket'], 'ticket_date': int(time()) } }) return event