Beispiel #1
0
 def test_filter(self):
     """
     Test EventType filter service
     """
     mixer.cycle(3).blend('base.EventType')
     event_type = EventTypeService().filter()
     assert len(event_type) == 3, 'Should return 3 EventType objects'
    def create_rule(name, description, system, event_type, nth_event,
                    escalation_level, duration, **kwargs):
        """
		Creates an escalation rule for a selected system.
		@param name: Name of the escalation rule to be created
		@type name: str
		@param system: The system which the escalation rule will be applied in
		@type system: str
		@param description: Details on the Escalation Rule
		@type description: str
		@param event_type: Type of the event(s) to be affected by the rule
		@type event_type: str
		@param nth_event: Number of event of a certain type that need to be logged to raise an escalation
		@type nth_event: str
		@param duration: Time period within which certain events must occur to trigger an escalation.
		@type duration: int
		@param escalation_level: Level at which an escalation is configured with a set of recipients
		@type escalation_level: str
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name="Active")
            escalation_level = EscalationLevelService().get(
                pk=escalation_level, state__name="Active")
            event_type = EventTypeService().get(pk=event_type,
                                                state__name='Active')
            if system is None or escalation_level is None or event_type is None:
                return {"code": "800.400.002"}

            escalation_rule = EscalationRuleService().create(
                name=name,
                description=description,
                system=system,
                nth_event=int(nth_event),
                duration=timedelta(seconds=duration),
                state=StateService().get(name='Active'),
                escalation_level=escalation_level,
                event_type=event_type)
            if escalation_rule is not None:
                rule = EscalationRuleService().filter(
                    pk=escalation_rule.id, system=system).values(
                        'id',
                        'name',
                        'description',
                        'duration',
                        'date_created',
                        'date_modified',
                        'nth_event',
                        system_id=F('system'),
                        escalation_level_name=F('escalation_level__name'),
                        state_name=F('state__name'),
                        event_type_name=F('event_type__name')).first()
                rule.update(
                    duration=timedelta.total_seconds(rule.get('duration')))
                return {'code': '800.200.001', 'data': rule}
        except Exception as ex:
            lgr.exception("Escalation Rule Creation exception %s" % ex)
        return {"code": "800.400.001"}
Beispiel #3
0
 def test_update(self):
     """
     Test IncidentLog update service
     """
     event_type = mixer.blend('base.EventType')
     event_type = EventTypeService().update(event_type.id, description='response2')
     assert event_type is not None, 'Should create an EventType object'
     assert event_type.description == 'response2', 'EventType description has been updated to response2'
Beispiel #4
0
 def test_create(self):
     """
     Test EventType create service
     """
     state = mixer.blend('base.State')
     event_type = EventTypeService().create(state=state, description='Incident1')
     assert event_type is not None, 'Should create an EventType Object'
     assert event_type.description == 'Incident1', ' EventType description is equals to Incident1'
Beispiel #5
0
 def test_get(self):
     """
     Test EventType get service
     """
     state = mixer.blend('base.State')
     mixer.blend('base.EventType', state=state)
     event_type = EventTypeService().get(state=state.id)
     assert event_type is not None, 'Should get a created EventType object'
    def get_look_up_data():
        """
		@return: a dictionary containing a success code and a list of dictionaries containing  system
							recipient data
		@rtype:dict
		"""
        try:
            state = list(StateService().filter().values('id', 'name'))
            notification_type = list(NotificationTypeService().filter().values(
                'id', 'name'))
            escalation_level = list(EscalationLevelService().filter().values(
                'id', 'name'))
            event_type = list(EventTypeService().filter().values('id', 'name'))
            endpoint_type = list(EndpointTypeService().filter().values(
                'id', 'name'))
            incident_type = list(IncidentTypeService().filter().values(
                'id', 'name'))
            user = list(User.objects.all().values('id', 'username'))
            system = list(SystemService().filter().values('id', 'name'))
            endpoint_states = list(StateService().filter(
                Q(name='Operational') | Q(name='Minor Outage')
                | Q(name='Major Outage') | Q(name='Under Maintenance')
                | Q(name='Degraded Performance')).values('id', 'name'))
            realtime_incident_states = list(StateService().filter(
                Q(name='Investigating') | Q(name='Identified')
                | Q(name='Monitoring') | Q(name='Resolved')).values(
                    'id', 'name'))
            scheduled_incident_states = list(StateService().filter(
                Q(name='Scheduled') | Q(name='InProgress')
                | Q(name='Completed')).values('id', 'name'))
            lookups = {
                'states': state,
                'incident_types': incident_type,
                'escalation_levels': escalation_level,
                'notification_types': notification_type,
                'endpoint_types': endpoint_type,
                'event_types': event_type,
                'users': user,
                'systems': system,
                'realtime_incident_states': realtime_incident_states,
                'endpoint_states': endpoint_states,
                'scheduled_incident_states': scheduled_incident_states
            }

            return {"code": "800.200.001", "data": lookups}

        except Exception as ex:
            lgr.exception("Look up interface Exception:  %s" % ex)
        return {
            "code": "800.400.001",
            "message": "Error while fetching data %s" % str(ex)
        }
Beispiel #7
0
 def perform_health_check():
     """
     This method formats system  data and logs system status to system monitor model
     @return: Systems: a dictionary containing a success code and a list of dictionaries containing  system status
                     data
     @rtype:dict
     """
     systems = []
     try:
         for endpoint in EndpointService().filter(
                 system__state__name="Active",
                 endpoint_type__is_queried=True):
             try:
                 health_state = requests.get(endpoint.url)
                 monitor_data = {
                     'system': endpoint.system.name,
                     'endpoint': endpoint.name,
                     'response_body': health_state.content,
                     'response_code': health_state.status_code,
                     'state': StateService().get(name='Operational').name,
                 }
                 if health_state.status_code == 200:
                     if health_state.elapsed > endpoint.optimal_response_time:
                         monitor_data.update({
                             "response_time_speed":
                             'Slow',
                             "event_type":
                             EventTypeService().get(name='Warning'),
                             "description":
                             'Response time is not within the expected time',
                             "state":
                             StateService().get(
                                 name='Degraded Performance'),
                             "response_time":
                             health_state.elapsed.total_seconds()
                         })
                     else:
                         monitor_data.update({
                             'response_time_speed':
                             'Normal',
                             "response_time":
                             health_state.elapsed.total_seconds()
                         })
                 else:
                     monitor_data.update({
                         "response_time_speed":
                         None,
                         "event_type":
                         EventTypeService().get(name='Critical'),
                         "description":
                         'The system is not accessible',
                         "state":
                         StateService().get(name='Major Outage')
                     })
                 system_status = SystemMonitorService().create(
                     system=SystemService().get(
                         name=monitor_data.get('system')),
                     response_time=timedelta(
                         seconds=int(monitor_data.get('response_time'))),
                     response_time_speed=monitor_data.get(
                         "response_time_speed"),
                     state=StateService().get(
                         name=monitor_data.get('state')),
                     response_body=monitor_data.get("response_body"),
                     endpoint=EndpointService().get(
                         name=monitor_data.get("endpoint")),
                     response_code=monitor_data.get("response_code"))
                 if system_status is not None:
                     systems.append({
                         "system": system_status.system.name,
                         "status": system_status.state.name,
                         "endpoint": endpoint.url
                     })
                 else:
                     systems.append({
                         "system": system_status.system,
                         "status": "failed",
                         "endpoint": endpoint
                     })
                 if monitor_data.get("event_type") is not None:
                     event = EventLog.log_event(
                         event_type=monitor_data.get("event_type").name,
                         system=monitor_data.get("system"),
                         description=monitor_data.get("description"),
                         response=monitor_data.get('response'),
                         request=health_state.request)
                     if event['code'] != "800.200.001":
                         lgr.warning("Event creation failed %s" % event)
             except requests.ConnectionError as e:
                 lgr.exception('Endpoint health check failed:  %s' % e)
         return {"code": "800.200.001", "data": {"systems": systems}}
     except Exception as ex:
         lgr.exception("Health Status exception:  %s" % ex)
     return {
         "code": "800.400.001",
         "message": "Error while performing health check"
     }
Beispiel #8
0
    def log_incident(incident_type,
                     system,
                     escalation_level,
                     name,
                     description,
                     priority_level,
                     event_type=None,
                     state="Investigating",
                     escalated_events=None,
                     scheduled_for=None,
                     scheduled_until=None,
                     **kwargs):
        """
		Creates a realtime incident based on escalated events or scheduled incident based on user reports
		@param incident_type: Type of the incident to be created
		@type incident_type: str
		@param system: The system which the incident will be associated with
		@type system: str
		@param name: Title of the incident
		@type name: str
		@param description: Details on the incident
		@type description: str
		@param event_type: Type of the event(s) that triggered creation of the incident, if its event driven.
		@type event_type: str | None
		@param escalated_events: One or more events in the escalation if the incident is event driven.
		@type escalated_events: list | None
		@param state: Initial resolution state of the incident. Defaults to Investigating if left blank
		@type state: str
		@param priority_level: The level of importance to be assigned to the incident.
		@type priority_level: str
		@param escalation_level: Level at which an escalation is configured with a set of recipients
		@type escalation_level: str
		@param scheduled_for: Time the scheduled maintenance should begin if the incident is scheduled
		@type scheduled_for: str | None
		@param scheduled_until: Time the scheduled maintenance should end if the incident is scheduled
		@type scheduled_until: str | None
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name="Active")
            incident_type = IncidentTypeService().get(name=incident_type,
                                                      state__name="Active")
            try:
                state = StateService().get(pk=uuid.UUID(state))
            except ValueError:
                state = StateService().get(
                    name=state
                ) if incident_type.name == 'Realtime' else StateService().get(
                    name='Scheduled')
            escalation_level = EscalationLevelService().get(
                pk=escalation_level, state__name="Active")
            if system is None or incident_type is None or escalation_level is None:
                return {"code": "800.400.002"}
            if incident_type.name == "Realtime" and event_type is not None:
                incident = IncidentService().filter(
                    event_type__name=event_type, system=system).exclude(
                        Q(state__name='Resolved'),
                        Q(state__name='Completed')).order_by(
                            '-date_created').first()
                if incident and int(priority_level) < 5:
                    priority_level = incident.priority_level + 1
                    return IncidentAdministrator().update_incident(
                        incident_id=incident.id,
                        escalation_level=escalation_level.name,
                        name=incident.name,
                        state=incident.state.id,
                        priority_level=str(priority_level),
                        description=
                        "Priority level of %s incident changed to %s" %
                        (incident.name, priority_level))
            if incident_type.name == 'Scheduled':
                scheduled_for = dateutil.parser.parse(scheduled_for)
                scheduled_until = dateutil.parser.parse(scheduled_until)
            incident = IncidentService().create(
                name=name,
                description=description,
                state=StateService().get(name=state),
                system=system,
                incident_type=incident_type,
                scheduled_for=scheduled_for,
                scheduled_until=scheduled_until,
                event_type=EventTypeService().get(name=event_type),
                priority_level=int(priority_level))
            incident_log = IncidentLogService().create(
                description=description,
                incident=incident,
                priority_level=priority_level,
                state=StateService().get(name=state),
                escalation_level=escalation_level)
            if incident is not None and incident_log is not None:
                if escalated_events:
                    for event in escalated_events:
                        incident_event = IncidentEventService().create(
                            event=event,
                            incident=incident,
                            state=StateService().get(name="Active"))
                        if not incident_event:
                            lgr.error("Error creating incident-events")
                email_system_recipients = SystemRecipientService().filter(
                    escalation_level=escalation_level,
                    system=incident.system,
                    state__name='Active',
                    notification_type__name='Email').values('recipient__id')
                sms_system_recipients = SystemRecipientService().filter(
                    escalation_level=escalation_level,
                    system=incident.system,
                    state__name='Active',
                    notification_type__name='Sms').values('recipient__id')
                sms_notification = NotificationLogger().send_notification(
                    message=incident.description,
                    message_type="Sms",
                    system_id=incident.system.id,
                    recipients=[
                        str(recipient["phone_number"])
                        for recipient in User.objects.filter(
                            id__in=sms_system_recipients,
                            is_active=True).values("phone_number")
                    ])
                email_notification = NotificationLogger().send_notification(
                    message=incident.description,
                    message_type="Email",
                    system_id=incident.system.id,
                    recipients=[
                        str(recipient['user__email']) for recipient in
                        User.objects.filter(id__in=email_system_recipients,
                                            is_active=True).values('email')
                    ])
                if sms_notification.get(
                        'code') != '800.200.001' or email_notification.get(
                            'code') != '800.200.001':
                    lgr.exception("Notification sending failed")
                return {'code': '800.200.001'}
        except Exception as ex:
            lgr.exception("Incident Logger exception %s" % ex)
        return {"code": "800.400.001"}
    def update_rule(rule_id,
                    name=None,
                    description=None,
                    nth_event=None,
                    escalation_level=None,
                    duration=None,
                    event_type=None,
                    **kwargs):
        """
		Updates an escalation rule for a selected system.
		@param rule_id: The id of the rule to be updated
		@type rule_id: str
		@param name: Name of the escalation rule to be created
		@type name: str | None
		@param description: Details on the Escalation Rule
		@type description: str | None
		@param nth_event: Number of event of a certain type that need to be logged to raise an escalation
		@type nth_event: str | None
		@param duration:Time period within which certain events must occur to trigger an escalation.
		@type duration: int | None
		@param event_type: The event type to be applied for an escalation with the rule.
		@type event_type: str | None
		@param escalation_level: Level at which an escalation is configured with a set of recipients
		@type escalation_level: str | None
		@param kwargs: Extra key-value arguments to pass for incident logging
		@return: Response code dictionary to indicate if the incident was created or not
		@rtype: dict
		"""
        try:
            escalation_rule = EscalationRuleService().filter(
                pk=rule_id, state__name='Active').first()
            if escalation_rule is None:
                return {"code": "800.400.002"}
            name = name if name is not None else escalation_rule.name
            description = description if description is not None else escalation_rule.description
            nth_event = int(
                nth_event
            ) if nth_event is not None else escalation_rule.nth_event
            duration = timedelta(
                seconds=duration
            ) if duration is not None else escalation_rule.duration
            escalation_level = EscalationLevelService().filter(
             pk = escalation_level, state__name = 'Active').first() if escalation_level is not None else \
             escalation_rule.escalation_level
            event_type = EventTypeService().filter(
             pk = event_type, state__name = 'Active').first() if event_type is not None else \
             escalation_rule.event_type
            state = escalation_rule.state

            updated_escalation_rule = EscalationRuleService().update(
                pk=escalation_rule.id,
                name=name,
                description=description,
                nth_event=int(nth_event),
                duration=duration,
                state=state,
                escalation_level=escalation_level,
                event_type=event_type)
            if updated_escalation_rule is not None:
                rule = EscalationRuleService().filter(
                    pk=escalation_rule.id).values(
                        'id',
                        'name',
                        'description',
                        'duration',
                        'date_created',
                        'date_modified',
                        'nth_event',
                        system_id=F('system'),
                        escalation_level_name=F('escalation_level__name'),
                        state_name=F('state__name'),
                        event_type_name=F('event_type__name')).first()
                rule.update(
                    duration=timedelta.total_seconds(rule.get('duration')))
                return {'code': '800.200.001', 'data': rule}
        except Exception as ex:
            lgr.exception("Escalation Rule Update exception %s" % ex)
        return {"code": "800.400.001"}
Beispiel #10
0
    def log_event(event_type,
                  system,
                  interface=None,
                  method=None,
                  response=None,
                  request=None,
                  code=None,
                  description=None,
                  stack_trace=None,
                  **kwargs):
        """
		Logs an event that being reported from an external system or an health check
		@param event_type: Type of the event to be logged
		@type event_type: str
		@param system: The system where the event occurred
		@type system: str
		@param interface: Specific interface in a system where the event occurred
		@type interface: str | None
		@param method: Specific method within an interface where the event occurred
		@type method: str | None
		@param response: Response body, if any, of the reported event occurrence
		@type response: str | None
		@param request: Request body, if any, of the reported event occurrence
		@type request: str | None
		@param code: Response code of the event
		@type code: str | None
		@param description: Detailed information on the event occurrence
		@type description: str | None
		@param stack_trace: Stack trace from the on the event occurrence
		@type stack_trace: str | None
		@param kwargs: Extra key=>value arguments to be passed for the event logging
		@return: Response code in a dictionary indicating if the event is created successfully or not
		@rtype: dict
		"""
        try:
            system = SystemService().get(pk=system, state__name="Active")
            event_type = EventTypeService().get(name=event_type,
                                                state__name="Active")
            if system is None or event_type is None:
                return {"code": "800.400.002"}
            event = EventService().create(
                event_type=event_type,
                system=system,
                method=method,
                response=response,
                request=request,
                code=code,
                description=description,
                state=StateService().get(name="Active"),
                interface=InterfaceService().get(name=interface,
                                                 state__name="Active",
                                                 system=system),
                stack_trace=stack_trace)
            if event is not None:
                escalation = EventLog.escalate_event(event)
                if escalation.get('code') != '800.200.001':
                    lgr.error('%s event escalation Failed' % event_type)
                created_event = EventService().filter(id=event.id).values(
                    'id', 'event_type', 'state__id', 'system__id', 'method',
                    'response', 'request', 'code', 'description',
                    'interface__id', 'stack_trace').first()
                return {'code': '800.200.001', 'data': created_event}
        except Exception as ex:
            lgr.exception('Event processor exception %s' % ex)
        return {'code': '800.400.001'}