def test_filter(self): """ Test EventType filter service """ mixer.cycle(3).blend('base.EventType') event_type = EventTypeService().filter() assert len(event_type) == 3, 'Should return 3 EventType objects'
def create_rule(name, description, system, event_type, nth_event, escalation_level, duration, **kwargs): """ Creates an escalation rule for a selected system. @param name: Name of the escalation rule to be created @type name: str @param system: The system which the escalation rule will be applied in @type system: str @param description: Details on the Escalation Rule @type description: str @param event_type: Type of the event(s) to be affected by the rule @type event_type: str @param nth_event: Number of event of a certain type that need to be logged to raise an escalation @type nth_event: str @param duration: Time period within which certain events must occur to trigger an escalation. @type duration: int @param escalation_level: Level at which an escalation is configured with a set of recipients @type escalation_level: str @param kwargs: Extra key-value arguments to pass for incident logging @return: Response code dictionary to indicate if the incident was created or not @rtype: dict """ try: system = SystemService().get(pk=system, state__name="Active") escalation_level = EscalationLevelService().get( pk=escalation_level, state__name="Active") event_type = EventTypeService().get(pk=event_type, state__name='Active') if system is None or escalation_level is None or event_type is None: return {"code": "800.400.002"} escalation_rule = EscalationRuleService().create( name=name, description=description, system=system, nth_event=int(nth_event), duration=timedelta(seconds=duration), state=StateService().get(name='Active'), escalation_level=escalation_level, event_type=event_type) if escalation_rule is not None: rule = EscalationRuleService().filter( pk=escalation_rule.id, system=system).values( 'id', 'name', 'description', 'duration', 'date_created', 'date_modified', 'nth_event', system_id=F('system'), escalation_level_name=F('escalation_level__name'), state_name=F('state__name'), event_type_name=F('event_type__name')).first() rule.update( duration=timedelta.total_seconds(rule.get('duration'))) return {'code': '800.200.001', 'data': rule} except Exception as ex: lgr.exception("Escalation Rule Creation exception %s" % ex) return {"code": "800.400.001"}
def test_update(self): """ Test IncidentLog update service """ event_type = mixer.blend('base.EventType') event_type = EventTypeService().update(event_type.id, description='response2') assert event_type is not None, 'Should create an EventType object' assert event_type.description == 'response2', 'EventType description has been updated to response2'
def test_create(self): """ Test EventType create service """ state = mixer.blend('base.State') event_type = EventTypeService().create(state=state, description='Incident1') assert event_type is not None, 'Should create an EventType Object' assert event_type.description == 'Incident1', ' EventType description is equals to Incident1'
def test_get(self): """ Test EventType get service """ state = mixer.blend('base.State') mixer.blend('base.EventType', state=state) event_type = EventTypeService().get(state=state.id) assert event_type is not None, 'Should get a created EventType object'
def get_look_up_data(): """ @return: a dictionary containing a success code and a list of dictionaries containing system recipient data @rtype:dict """ try: state = list(StateService().filter().values('id', 'name')) notification_type = list(NotificationTypeService().filter().values( 'id', 'name')) escalation_level = list(EscalationLevelService().filter().values( 'id', 'name')) event_type = list(EventTypeService().filter().values('id', 'name')) endpoint_type = list(EndpointTypeService().filter().values( 'id', 'name')) incident_type = list(IncidentTypeService().filter().values( 'id', 'name')) user = list(User.objects.all().values('id', 'username')) system = list(SystemService().filter().values('id', 'name')) endpoint_states = list(StateService().filter( Q(name='Operational') | Q(name='Minor Outage') | Q(name='Major Outage') | Q(name='Under Maintenance') | Q(name='Degraded Performance')).values('id', 'name')) realtime_incident_states = list(StateService().filter( Q(name='Investigating') | Q(name='Identified') | Q(name='Monitoring') | Q(name='Resolved')).values( 'id', 'name')) scheduled_incident_states = list(StateService().filter( Q(name='Scheduled') | Q(name='InProgress') | Q(name='Completed')).values('id', 'name')) lookups = { 'states': state, 'incident_types': incident_type, 'escalation_levels': escalation_level, 'notification_types': notification_type, 'endpoint_types': endpoint_type, 'event_types': event_type, 'users': user, 'systems': system, 'realtime_incident_states': realtime_incident_states, 'endpoint_states': endpoint_states, 'scheduled_incident_states': scheduled_incident_states } return {"code": "800.200.001", "data": lookups} except Exception as ex: lgr.exception("Look up interface Exception: %s" % ex) return { "code": "800.400.001", "message": "Error while fetching data %s" % str(ex) }
def perform_health_check(): """ This method formats system data and logs system status to system monitor model @return: Systems: a dictionary containing a success code and a list of dictionaries containing system status data @rtype:dict """ systems = [] try: for endpoint in EndpointService().filter( system__state__name="Active", endpoint_type__is_queried=True): try: health_state = requests.get(endpoint.url) monitor_data = { 'system': endpoint.system.name, 'endpoint': endpoint.name, 'response_body': health_state.content, 'response_code': health_state.status_code, 'state': StateService().get(name='Operational').name, } if health_state.status_code == 200: if health_state.elapsed > endpoint.optimal_response_time: monitor_data.update({ "response_time_speed": 'Slow', "event_type": EventTypeService().get(name='Warning'), "description": 'Response time is not within the expected time', "state": StateService().get( name='Degraded Performance'), "response_time": health_state.elapsed.total_seconds() }) else: monitor_data.update({ 'response_time_speed': 'Normal', "response_time": health_state.elapsed.total_seconds() }) else: monitor_data.update({ "response_time_speed": None, "event_type": EventTypeService().get(name='Critical'), "description": 'The system is not accessible', "state": StateService().get(name='Major Outage') }) system_status = SystemMonitorService().create( system=SystemService().get( name=monitor_data.get('system')), response_time=timedelta( seconds=int(monitor_data.get('response_time'))), response_time_speed=monitor_data.get( "response_time_speed"), state=StateService().get( name=monitor_data.get('state')), response_body=monitor_data.get("response_body"), endpoint=EndpointService().get( name=monitor_data.get("endpoint")), response_code=monitor_data.get("response_code")) if system_status is not None: systems.append({ "system": system_status.system.name, "status": system_status.state.name, "endpoint": endpoint.url }) else: systems.append({ "system": system_status.system, "status": "failed", "endpoint": endpoint }) if monitor_data.get("event_type") is not None: event = EventLog.log_event( event_type=monitor_data.get("event_type").name, system=monitor_data.get("system"), description=monitor_data.get("description"), response=monitor_data.get('response'), request=health_state.request) if event['code'] != "800.200.001": lgr.warning("Event creation failed %s" % event) except requests.ConnectionError as e: lgr.exception('Endpoint health check failed: %s' % e) return {"code": "800.200.001", "data": {"systems": systems}} except Exception as ex: lgr.exception("Health Status exception: %s" % ex) return { "code": "800.400.001", "message": "Error while performing health check" }
def log_incident(incident_type, system, escalation_level, name, description, priority_level, event_type=None, state="Investigating", escalated_events=None, scheduled_for=None, scheduled_until=None, **kwargs): """ Creates a realtime incident based on escalated events or scheduled incident based on user reports @param incident_type: Type of the incident to be created @type incident_type: str @param system: The system which the incident will be associated with @type system: str @param name: Title of the incident @type name: str @param description: Details on the incident @type description: str @param event_type: Type of the event(s) that triggered creation of the incident, if its event driven. @type event_type: str | None @param escalated_events: One or more events in the escalation if the incident is event driven. @type escalated_events: list | None @param state: Initial resolution state of the incident. Defaults to Investigating if left blank @type state: str @param priority_level: The level of importance to be assigned to the incident. @type priority_level: str @param escalation_level: Level at which an escalation is configured with a set of recipients @type escalation_level: str @param scheduled_for: Time the scheduled maintenance should begin if the incident is scheduled @type scheduled_for: str | None @param scheduled_until: Time the scheduled maintenance should end if the incident is scheduled @type scheduled_until: str | None @param kwargs: Extra key-value arguments to pass for incident logging @return: Response code dictionary to indicate if the incident was created or not @rtype: dict """ try: system = SystemService().get(pk=system, state__name="Active") incident_type = IncidentTypeService().get(name=incident_type, state__name="Active") try: state = StateService().get(pk=uuid.UUID(state)) except ValueError: state = StateService().get( name=state ) if incident_type.name == 'Realtime' else StateService().get( name='Scheduled') escalation_level = EscalationLevelService().get( pk=escalation_level, state__name="Active") if system is None or incident_type is None or escalation_level is None: return {"code": "800.400.002"} if incident_type.name == "Realtime" and event_type is not None: incident = IncidentService().filter( event_type__name=event_type, system=system).exclude( Q(state__name='Resolved'), Q(state__name='Completed')).order_by( '-date_created').first() if incident and int(priority_level) < 5: priority_level = incident.priority_level + 1 return IncidentAdministrator().update_incident( incident_id=incident.id, escalation_level=escalation_level.name, name=incident.name, state=incident.state.id, priority_level=str(priority_level), description= "Priority level of %s incident changed to %s" % (incident.name, priority_level)) if incident_type.name == 'Scheduled': scheduled_for = dateutil.parser.parse(scheduled_for) scheduled_until = dateutil.parser.parse(scheduled_until) incident = IncidentService().create( name=name, description=description, state=StateService().get(name=state), system=system, incident_type=incident_type, scheduled_for=scheduled_for, scheduled_until=scheduled_until, event_type=EventTypeService().get(name=event_type), priority_level=int(priority_level)) incident_log = IncidentLogService().create( description=description, incident=incident, priority_level=priority_level, state=StateService().get(name=state), escalation_level=escalation_level) if incident is not None and incident_log is not None: if escalated_events: for event in escalated_events: incident_event = IncidentEventService().create( event=event, incident=incident, state=StateService().get(name="Active")) if not incident_event: lgr.error("Error creating incident-events") email_system_recipients = SystemRecipientService().filter( escalation_level=escalation_level, system=incident.system, state__name='Active', notification_type__name='Email').values('recipient__id') sms_system_recipients = SystemRecipientService().filter( escalation_level=escalation_level, system=incident.system, state__name='Active', notification_type__name='Sms').values('recipient__id') sms_notification = NotificationLogger().send_notification( message=incident.description, message_type="Sms", system_id=incident.system.id, recipients=[ str(recipient["phone_number"]) for recipient in User.objects.filter( id__in=sms_system_recipients, is_active=True).values("phone_number") ]) email_notification = NotificationLogger().send_notification( message=incident.description, message_type="Email", system_id=incident.system.id, recipients=[ str(recipient['user__email']) for recipient in User.objects.filter(id__in=email_system_recipients, is_active=True).values('email') ]) if sms_notification.get( 'code') != '800.200.001' or email_notification.get( 'code') != '800.200.001': lgr.exception("Notification sending failed") return {'code': '800.200.001'} except Exception as ex: lgr.exception("Incident Logger exception %s" % ex) return {"code": "800.400.001"}
def update_rule(rule_id, name=None, description=None, nth_event=None, escalation_level=None, duration=None, event_type=None, **kwargs): """ Updates an escalation rule for a selected system. @param rule_id: The id of the rule to be updated @type rule_id: str @param name: Name of the escalation rule to be created @type name: str | None @param description: Details on the Escalation Rule @type description: str | None @param nth_event: Number of event of a certain type that need to be logged to raise an escalation @type nth_event: str | None @param duration:Time period within which certain events must occur to trigger an escalation. @type duration: int | None @param event_type: The event type to be applied for an escalation with the rule. @type event_type: str | None @param escalation_level: Level at which an escalation is configured with a set of recipients @type escalation_level: str | None @param kwargs: Extra key-value arguments to pass for incident logging @return: Response code dictionary to indicate if the incident was created or not @rtype: dict """ try: escalation_rule = EscalationRuleService().filter( pk=rule_id, state__name='Active').first() if escalation_rule is None: return {"code": "800.400.002"} name = name if name is not None else escalation_rule.name description = description if description is not None else escalation_rule.description nth_event = int( nth_event ) if nth_event is not None else escalation_rule.nth_event duration = timedelta( seconds=duration ) if duration is not None else escalation_rule.duration escalation_level = EscalationLevelService().filter( pk = escalation_level, state__name = 'Active').first() if escalation_level is not None else \ escalation_rule.escalation_level event_type = EventTypeService().filter( pk = event_type, state__name = 'Active').first() if event_type is not None else \ escalation_rule.event_type state = escalation_rule.state updated_escalation_rule = EscalationRuleService().update( pk=escalation_rule.id, name=name, description=description, nth_event=int(nth_event), duration=duration, state=state, escalation_level=escalation_level, event_type=event_type) if updated_escalation_rule is not None: rule = EscalationRuleService().filter( pk=escalation_rule.id).values( 'id', 'name', 'description', 'duration', 'date_created', 'date_modified', 'nth_event', system_id=F('system'), escalation_level_name=F('escalation_level__name'), state_name=F('state__name'), event_type_name=F('event_type__name')).first() rule.update( duration=timedelta.total_seconds(rule.get('duration'))) return {'code': '800.200.001', 'data': rule} except Exception as ex: lgr.exception("Escalation Rule Update exception %s" % ex) return {"code": "800.400.001"}
def log_event(event_type, system, interface=None, method=None, response=None, request=None, code=None, description=None, stack_trace=None, **kwargs): """ Logs an event that being reported from an external system or an health check @param event_type: Type of the event to be logged @type event_type: str @param system: The system where the event occurred @type system: str @param interface: Specific interface in a system where the event occurred @type interface: str | None @param method: Specific method within an interface where the event occurred @type method: str | None @param response: Response body, if any, of the reported event occurrence @type response: str | None @param request: Request body, if any, of the reported event occurrence @type request: str | None @param code: Response code of the event @type code: str | None @param description: Detailed information on the event occurrence @type description: str | None @param stack_trace: Stack trace from the on the event occurrence @type stack_trace: str | None @param kwargs: Extra key=>value arguments to be passed for the event logging @return: Response code in a dictionary indicating if the event is created successfully or not @rtype: dict """ try: system = SystemService().get(pk=system, state__name="Active") event_type = EventTypeService().get(name=event_type, state__name="Active") if system is None or event_type is None: return {"code": "800.400.002"} event = EventService().create( event_type=event_type, system=system, method=method, response=response, request=request, code=code, description=description, state=StateService().get(name="Active"), interface=InterfaceService().get(name=interface, state__name="Active", system=system), stack_trace=stack_trace) if event is not None: escalation = EventLog.escalate_event(event) if escalation.get('code') != '800.200.001': lgr.error('%s event escalation Failed' % event_type) created_event = EventService().filter(id=event.id).values( 'id', 'event_type', 'state__id', 'system__id', 'method', 'response', 'request', 'code', 'description', 'interface__id', 'stack_trace').first() return {'code': '800.200.001', 'data': created_event} except Exception as ex: lgr.exception('Event processor exception %s' % ex) return {'code': '800.400.001'}