def escalate_event(event): """ Checks registered escalation rules to determine if an event occurrence is to be escalated or not. @param event: A logged event to be checked for escalation @type event: Event @return: Response code in a dictionary indicating if the event is created successfully or not @rtype: dict """ try: matched_rules = EscalationRuleService().filter( event_type=event.event_type, system=event.system).order_by("-nth_event") now = timezone.now() for matched_rule in matched_rules: escalated_events = EventService().filter( event_type=event.event_type, date_created__range=(now - timedelta( seconds=matched_rule.duration.total_seconds()), now)) if escalated_events.count() >= matched_rule.nth_event > 0: return IncidentAdministrator.log_incident( name=matched_rule.name, incident_type="Realtime", system=event.system.id, state="Investigating", escalated_events=escalated_events, escalation_level=matched_rule.escalation_level.id, event_type=event.event_type.name, description=matched_rule.description, priority_level=event.event_type.priority_level()) return {"code": "800.200.001"} except Exception as ex: lgr.exception("Event Logger exception %s " % ex) return {'code': '800.400.001'}
def test_filter(self): """ Test Event filter service """ mixer.cycle(3).blend('core.Event') event = EventService().filter() assert len(event) == 3, 'Should return 3 SystemMonitor objects'
def get_events(system_id): """ Retrieves events logged for a certain system @param: system_id: Id of the system @type system_id: str @return: Response code indicating status and logged events """ try: system = SystemService().get(pk=system_id, state__name='Active') if not system: return {'code': '800.400.200'} events = list(EventService().filter( system=system, state__name='Active').values( 'id', 'date_created', 'interface', 'method', 'request', 'response', 'stack_trace', 'description', 'code', status=F('state__name'), system_name=F('system__name'), eventtype=F('event_type__name')).order_by('-date_created')) return {'code': '800.200.001', 'data': events} except Exception as ex: lgr.exception("Get events Exception %s" % ex) return {'code': '800.400.001'}
def get_event(event_id, system_id): """ Retrieves an event logged for a certain system @param: event_id: Id of the event @type event_id: str @param: system_id: Id of the system @type system_id: str @return: Response code indicating status and logged event """ try: system = SystemService().get(pk=system_id, state__name='Active') event = EventService().filter( pk=event_id, system=system, state__name='Active').values( 'id', 'date_created', 'interface', 'method', 'request', 'response', 'stack_trace', 'description', 'code', status=F('state__name'), system_name=F('system__name'), eventtype=F('event_type__name')).first() if system is None or event is None: return {'code': '800.400.200', 'event': str(event_id)} return {'code': '800.200.001', 'data': event} except Exception as ex: lgr.exception("Get event Exception %s" % ex) return {'code': '800.400.001'}
def test_update(self): """ Test Event update service """ event = mixer.blend('core.Event') event = EventService().update(event.id, response='response2') assert event is not None, 'Should create a System Monitor object' assert event.response == 'response2', 'Response is equals to response2'
def dashboard_widgets_data(system, date_from=None, date_to=None): """ Retrieves historical data within a specified start and end date range within a system @param system: System where the incident is created in @type system: str @param date_from: Start date limit applied @type date_from: str | None @param date_to: End date limit to be applied @type date_to: str | None @return: incidents | response code to indicate errors retrieving the data @rtype: dict """ try: system = SystemService().get(pk=system, state__name='Active') if not system: return {'code': '800.400.002'} if date_from and date_to: date_from = dateutil.parser.parse(date_from) date_to = dateutil.parser.parse(date_to) else: date_from = datetime.combine( datetime.now(), datetime.min.time()) + timedelta(days=1) date_to = date_from - timedelta(days=1) reported_events = EventService().filter( system=system, date_created__lte=date_from, date_created__gte=date_to).count() open_incidents = IncidentService().filter( system=system, incident_type__name='Realtime', date_created__lte=date_from, date_created__gte=date_to).exclude( state__name='Resolved').count() closed_incidents = IncidentService().filter( system=system, incident_type__name='Realtime', state__name='Resolved', date_created__lte=date_from, date_created__gte=date_to).count() scheduled_incidents = IncidentService().filter( system=system, incident_type__name='Scheduled', date_created__lte=date_from, date_created__gte=date_to).exclude( state__name='Completed').count() data = { 'reported_events': reported_events, 'open_incidents': open_incidents, 'closed_incidents': closed_incidents, 'scheduled_incidents': scheduled_incidents } return {'code': '800.200.001', 'data': data} except Exception as ex: lgr.exception("Get incidents exception %s" % ex) return {'code': '800.400.001'}
def test_create(self): """ Test Event create service """ interface = mixer.blend('core.Interface') event_type = mixer.blend('base.EventType') system = mixer.blend('core.System') state = mixer.blend('base.State') event = EventService().create(system=system, interface=interface, state=state, event_type=event_type) assert event is not None, 'Should create an Event Object'
def test_get(self): """ Test Event get service """ system = mixer.blend('core.System') interface = mixer.blend('core.Interface') event_type = mixer.blend('base.EventType') state = mixer.blend('base.State') mixer.blend('core.Event', system=system, interface=interface, event_type=event_type, state=state, method='Some', response='response', code='200') event = EventService().get(system=system.id) assert event is not None, 'Should get a created Event object'
def get_error_rate(system_id, start_date, end_date): """ Calculates and returns the error rate of a system based on logged events @param: system_id: Id of the system @type system_id: str @param start_date: Start point of the data to be presented @type: start_date: str @param: end_date: End date of the period for which the data is to be extracted @type end_date: str @return: Response code indicating status and error rate graph data """ try: system = SystemService().get(pk=system_id, state__name='Active') if not system: return {'code': '800.400.200'} now = timezone.now() start_date = dateutil.parser.parse(start_date) end_date = dateutil.parser.parse(end_date) series = [] color = "#E44D25" name = "Number of errors" period = start_date - end_date dataset = [] if period.days <= 1: for i in range(1, 25): past_hour = now - timedelta(hours=i, minutes=0) current_hour = past_hour + timedelta(hours=1) current_errors = EventService().filter( system=system, event_type__name='Error', date_created__lte=current_hour, date_created__gte=past_hour).count() past_hour = past_hour.replace(minute=0) series.append(dict(value=current_errors, name=past_hour)) result = { "name": name, "color": color, "series": series, "yAxisValue": "Number of Errors Occurred" } dataset.append(result) elif period.days <= 7: for i in range(0, 7): current_day = now - timedelta(days=i, hours=0, minutes=0) past_day = current_day + timedelta(days=1) current_errors = EventService().filter( system=system, event_type__name='Error', date_created__lte=past_day, date_created__gte=current_day).count() past_day = past_day.replace(hour=0, minute=0) series.append(dict(value=current_errors, name=past_day)) result = { "name": name, "color": color, "series": series, "yAxisValue": "Number of Errors Occurred" } dataset.append(result) elif period.days <= 31: for i in range(0, 31): current_day = now - timedelta(days=i, hours=0, minutes=0) past_day = current_day + timedelta(days=1) current_errors = EventService().filter( system=system, event_type__name='Error', date_created__lte=past_day, date_created__gte=current_day).count() past_day = past_day.replace(hour=0, minute=0) series.append(dict(value=current_errors, name=past_day)) result = { "name": name, "color": color, "series": series, "yAxisValue": "Number of Errors Occurred" } dataset.append(result) elif period.days <= 365: current_date = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) current_month = now.month current_date = current_date.replace( day=1, hour=0, minute=0, second=0, microsecond=0) + timedelta(days=calendar.monthrange( current_date.year, current_month)[1] - 1) for i in range(1, 13): if current_month > 1: end_date = current_date start_date = current_date - timedelta( days=calendar.monthrange(end_date.year, end_date.month)[1] - 1) current_date = current_date - timedelta( days=calendar.monthrange(current_date.year, current_month)[1]) current_month = current_month - 1 else: end_date = current_date start_date = current_date - timedelta( days=calendar.monthrange(end_date.year, end_date.month)[1] - 1) current_date = current_date - timedelta( days=calendar.monthrange(current_date.year, current_month)[1]) current_month = current_date.month current_errors = EventService().filter( system=system, event_type__name='Error', date_created__lte=end_date, date_created__gte=start_date).count() series.append(dict(value=current_errors, name=current_date)) result = { "name": name, "color": color, "series": series, "yAxisValue": "Number of Errors Occurred" } dataset.append(result) else: intervals = 24 for i in range(1, intervals + 1): past_hour = now - timedelta(hours=i, minutes=0) current_hour = past_hour + timedelta(hours=1) current_errors = EventService().filter( system=system, event_type__name='Error', date_created__lte=current_hour, date_created__gte=past_hour).count() past_hour = past_hour.replace(minute=0) series.append(dict(value=current_errors, name=past_hour)) result = { "name": name, "color": color, "series": series, "yAxisValue": "Number of Errors Occurred" } dataset.append(result) return {'code': '800.200.001', 'data': dataset} except Exception as ex: lgr.exception("Get Error rate Exception %s" % ex) return {'code': '800.400.001 %s' % str(ex)}
def log_event(event_type, system, interface=None, method=None, response=None, request=None, code=None, description=None, stack_trace=None, **kwargs): """ Logs an event that being reported from an external system or an health check @param event_type: Type of the event to be logged @type event_type: str @param system: The system where the event occurred @type system: str @param interface: Specific interface in a system where the event occurred @type interface: str | None @param method: Specific method within an interface where the event occurred @type method: str | None @param response: Response body, if any, of the reported event occurrence @type response: str | None @param request: Request body, if any, of the reported event occurrence @type request: str | None @param code: Response code of the event @type code: str | None @param description: Detailed information on the event occurrence @type description: str | None @param stack_trace: Stack trace from the on the event occurrence @type stack_trace: str | None @param kwargs: Extra key=>value arguments to be passed for the event logging @return: Response code in a dictionary indicating if the event is created successfully or not @rtype: dict """ try: system = SystemService().get(pk=system, state__name="Active") event_type = EventTypeService().get(name=event_type, state__name="Active") if system is None or event_type is None: return {"code": "800.400.002"} event = EventService().create( event_type=event_type, system=system, method=method, response=response, request=request, code=code, description=description, state=StateService().get(name="Active"), interface=InterfaceService().get(name=interface, state__name="Active", system=system), stack_trace=stack_trace) if event is not None: escalation = EventLog.escalate_event(event) if escalation.get('code') != '800.200.001': lgr.error('%s event escalation Failed' % event_type) created_event = EventService().filter(id=event.id).values( 'id', 'event_type', 'state__id', 'system__id', 'method', 'response', 'request', 'code', 'description', 'interface__id', 'stack_trace').first() return {'code': '800.200.001', 'data': created_event} except Exception as ex: lgr.exception('Event processor exception %s' % ex) return {'code': '800.400.001'}