Beispiel #1
0
    def _resolve_incident(self, incident):

        if not incident['reported']:
            self._info(
                "no need to resolve an un-reported incident: {0}".format(
                    incident['incident_key']))
            self._summary['resolved_local'] += 1
            return True

        def _resolver(count):
            msgs = []
            for alert in incident['details']:
                msgs.append(alert.summary())

            data = json.dumps({
                'service_key':
                self._service_api_key,
                'incident_key':
                incident['incident_key'],
                'event_type':
                'resolve',
                'description':
                'RESOLVED! ' + incident['description'],
                'details':
                msgs
            })
            resp = http_post(
                'https://events.pagerduty.com/generic/2010-04-15/create_event.json',
                body=data,
                **self._http_base_params())
            self._info("resolved incident: {0} on try #{2} resp: {1}".format(
                incident['incident_key'], resp, count + 1))
            self._summary['resolved_api'] += 1

        ok, exceptions = do_request(_resolver, self._retry_total,
                                    self._retry_interval)
        for i, e in enumerate(exceptions):
            self._warn("try #{1}: failed to resolve incident: {0}".format(
                e, i + 1))

        return ok
Beispiel #2
0
    def _report_incident(self, incident, only_if_not_acknowledged):
        if not self._reportable(incident):
            self._summary['grace_period_applied'] += 1
            return

        self._debug("reporting incident: {0}".format(incident))

        if only_if_not_acknowledged:
            ## grab incident status and not spam incidents already acknowleged
            try:
                ## whether this incident has been reported or not
                qs = {
                    'fields': 'incident_number,status',
                    'incident_key': incident['incident_key'],
                    'status': 'acknowledged'
                }
                resp = http_get(self._get_request_url + urllib.urlencode(qs),
                                **self._http_base_params())
                if resp and resp['incidents']:
                    self._info(
                        "incident: {1} ALREADY ACKnowledged: {0}!!!".format(
                            resp['incidents'][0], incident['incident_key']))

                    incident.update(resp['incidents'][0])
                    self._summary['incidents_not_triggered_ins_processed'] += 1
                    return

            except Exception as e:
                self._warn("failed to grab incident status: {0}".format(e))

        app_incident = self._incident_app_reported(incident)
        if app_incident:
            incident['time_created'] = incident['time_occurred']
            self._info(
                "NOT reporting incident since app '{0}' already done so: {1}".
                format(incident['app'], app_incident))
            self._summary['incidents_not_triggered_app_processed'] += 1
            return

        ## fire this event regardless
        def _reporter(count):
            msgs = []
            for alert in incident['details']:
                msgs.append(alert.summary())

            data = json.dumps({
                'service_key': self._service_api_key,
                'incident_key': incident['incident_key'],
                'event_type': 'trigger',
                'description': incident['description'],
                'details': msgs
            })
            resp = http_post(
                'https://events.pagerduty.com/generic/2010-04-15/create_event.json',
                body=data,
                **self._http_base_params())
            self._info("reported incident: {0} on try #{2} resp: {1}".format(
                incident['incident_key'], resp, count + 1))
            self._summary['incidents_triggered'] += 1

            incident['reported'] = True

        ok, exceptions = do_request(_reporter, self._retry_total,
                                    self._retry_interval)
        for i, e in enumerate(exceptions):
            self._warn("try #{1}: failed to report incident: {0}".format(
                e, i + 1))

        return ok