Esempio n. 1
0
    def process_set_alarm_state_msg(self, metric_key, message):
        project_id = message.get("project_id")
        alarm_name = message.get("alarm_name")
        state_reason_data = message.get("state_reason_data")

        if metric_key not in self.metrics:
            self.metrics[metric_key] = MetricMonitor(metric_key, self.cass)

        metric = self.metrics[metric_key]

        ret = self.cass.get_metric_alarm_key(project_id, alarm_name)
        if ret:
            alarm_key = ret
            try:
                metricalarm = metric.alarms[alarm_key]
            except KeyError:
                storm.log("alarm key [%s] is found, but alarm is not found." % alarm_key)
                return
        else:
            storm.log("alarm key [%s] is not found." % alarm_key)
            return

        metricalarm["state_reason"] = message.get("state_reason")
        metricalarm["state_value"] = message.get("state_value")
        metricalarm["state_reason_data"] = message.get("state_reason_data")

        # write into database
        alarm_columns = {"state_reason": message.get("state_reason"), "state_value": message.get("state_value")}
        if state_reason_data:
            alarm_columns["state_reason_data"] = state_reason_data

        self.cass.put_metric_alarm(alarm_key, alarm_columns)
Esempio n. 2
0
    def put_metric_data(self, metric_key, timestamp, value, unit=None):
        def get_stats(tmp_stat):
            try:
                ret = dict(zip(self.COLUMNS, map(lambda x: x.values()[0], tmp_stat)))
                for v in ret:
                    if v == None:
                        v = float("nan")
            except IndexError:
                storm.log("index %s is not in DB." % time_idx)
                ret = {
                    "SampleCount": float("nan"),
                    "Sum": float("nan"),
                    "Average": float("nan"),
                    "Minimum": float("nan"),
                    "Maximum": float("nan"),
                }
            return ret

        time_idx = timestamp.replace(second=0, microsecond=0)
        time_diff = utils.utcnow() - time_idx

        if timedelta(seconds=self.STATISTICS_TTL) < time_diff:
            msg = "index %s is older than TTL. It doesn't need to insert DB"
            storm.log(msg % time_idx)
            return

        if time_idx not in self.df.index:
            self._reindex()

        value = utils.to_default_unit(value, unit)

        try:
            stat = self.df.ix[time_idx]

            for v in stat:
                if v == None:
                    v = float("nan")

        except KeyError:
            stat = self.cass.get_metric_statistics_for_key(metric_key, time_idx)
            stat = get_stats(stat)

        stat["SampleCount"] = 1.0 if isnull(stat["SampleCount"]) else stat["SampleCount"] + 1.0
        stat["Sum"] = value if isnull(stat["Sum"]) else stat["Sum"] + value
        stat["Average"] = stat["Sum"] / stat["SampleCount"]
        stat["Minimum"] = value if (isnull(stat["Minimum"]) or stat["Minimum"] > value) else stat["Minimum"]
        stat["Maximum"] = value if (isnull(stat["Maximum"]) or stat["Maximum"] < value) else stat["Maximum"]

        # insert into DB
        stat_dict = {
            "SampleCount": {time_idx: stat["SampleCount"]},
            "Sum": {time_idx: stat["Sum"]},
            "Average": {time_idx: stat["Average"]},
            "Minimum": {time_idx: stat["Minimum"]},
            "Maximum": {time_idx: stat["Maximum"]},
        }

        ttl = self.STATISTICS_TTL - time_diff.total_seconds()
        self.cass.insert_stat(self.metric_key, stat_dict, ttl)
        storm.log("metric data inserted %s" % (self.metric_key))
Esempio n. 3
0
 def emit(self, record):
     try:
         msg = self.format(record)
         storm.log(msg)
     except (KeyboardInterrupt, SystemExit):
         raise
     except:
         self.handleError(record)
Esempio n. 4
0
 def emit(self, record):
     try:
         msg = self.format(record)
         storm.log(msg)
     except (KeyboardInterrupt, SystemExit):
         raise
     except:
         self.handleError(record)
Esempio n. 5
0
 def get_stats(tmp_stat):
     try:
         ret = dict(zip(self.COLUMNS, map(lambda x: x.values()[0], tmp_stat)))
         for v in ret:
             if v == None:
                 v = float("nan")
     except IndexError:
         storm.log("index %s is not in DB." % time_idx)
         ret = {
             "SampleCount": float("nan"),
             "Sum": float("nan"),
             "Average": float("nan"),
             "Minimum": float("nan"),
             "Maximum": float("nan"),
         }
     return ret
Esempio n. 6
0
    def do_alarm_action(self, alarmkey, alarm, new_state, old_state, query_date):
        """
        parameter example:
        
        alarmkey: f459c0e0-f927-481f-9158-deb8abe102a2 
        alarm: OrderedDict([('actions_enabled', False), 
                            ('alarm_actions', u'[]'), 
                            ('alarm_arn', u'arn:spcs:synaps:IaaS:alarm:TEST_\uc54c\ub78c_02'), 
                            ('alarm_configuration_updated_timestamp', datetime.datetime(2012, 8, 25, 10, 51, 38, 469000)), 
                            ('alarm_description', u''), 
                            ('alarm_name', u'TEST_\uc54c\ub78c_02'), 
                            ('comparison_operator', u'LessThanThreshold'), 
                            ('dimensions', u'{"instance_name": "test instance"}'), 
                            ('evaluation_periods', 2), 
                            ('insufficient_data_actions', u'[]'), 
                            ('metric_key', UUID('96f19ec9-673b-4237-ae66-1bfde526595c')), 
                            ('metric_name', u'test_metric'), 
                            ('namespace', u'SPCS/SYNAPSTEST'), 
                            ('ok_actions', u'[]'), 
                            ('period', 300), 
                            ('project_id', u'IaaS'), 
                            ('state_reason', u'Threshold Crossed: 2 datapoints were not less than the threshold(2.000000). The most recent datapoints: [55.25, 55.25].'), 
                            ('state_reason_data', u'{"startDate": "2012-08-25T10:30:00.000000", "period": 300, "threshold": 2.0, "version": "1.0", "statistic": "Average", "recentDatapoints": [55.25, 55.25], "queryDate": "2012-08-25T10:32:24.671991"}'), 
                            ('state_updated_timestamp', datetime.datetime(2012, 8, 25, 11, 39, 49, 657449)), 
                            ('state_value', 'OK'), 
                            ('statistic', u'Average'), 
                            ('threshold', 2.0), 
                            ('unit', u'Percent'), 
                            ('reason', u'Threshold Crossed: 3 datapoints were not less than the threshold(2.000000). The most recent datapoints: [75.0, 80.0, 67.625].'), 
                            ('reason_data', '{"startDate": "2012-08-25T11:37:00.000000", "period": 300, "threshold": 2.0, "version": "1.0", "statistic": "Average", "recentDatapoints": [75.0, 80.0, 67.625], "queryDate": "2012-08-25T11:39:49.657449"}')
                            ]) 
        new_state: {'stateReason': u'Threshold Crossed: 3 datapoints were not less than the threshold(2.000000). The most recent datapoints: [75.0, 80.0, 67.625].', 
                    'stateValue': 'OK', 
                    'stateReasonData': {'startDate': '2012-08-25T11:37:00.000000', 'period': 300, 'threshold': 2.0, 'version': '1.0', 'statistic': u'Average', 'recentDatapoints': [75.0, 80.0, 67.625], 'queryDate': '2012-08-25T11:39:49.657449'}} 
        old_state: {'stateReason': u'Insufficient Data: 1 datapoints were unknown.', 
                    'stateReasonData': {u'startDate': u'2012-08-25T11:37:00.000000', u'period': 300, u'recentDatapoints': [55.25], u'version': u'1.0', u'statistic': u'Average', u'threshold': 2.0, u'queryDate': u'2012-08-25T11:39:26.261056'}, 'stateValue': 'INSUFFICIENT_DATA'}
        """

        msg = {
            "state": new_state["stateValue"],
            "subject": "%s state has been changed from %s to %s at %s"
            % (alarm["alarm_name"], old_state["stateValue"], new_state["stateValue"], query_date),
            "body": "%s at %s" % (new_state["stateReason"], query_date),
            "query_date": query_date,
        }
        storm.log("emit to Alarm Action: %s %s" % (alarmkey, msg))
        storm.emit([str(alarmkey), json.dumps(msg)])
Esempio n. 7
0
    def delete_metric_alarm(self, alarmkey):
        """
        Delete alarms from memory and database
        
        alarmkey:
            alarmkey should be UUID
        """
        try:
            alarm = self.alarms.pop(alarmkey)
        except KeyError:
            storm.log("alarmkey %s doesn't exist" % alarmkey)
            return

        self.cass.delete_metric_alarm(alarmkey)
        self.alarm_history_delete(alarmkey, alarm)
        storm.log("delete alarm %s for metric %s" % (str(alarmkey), self.metric_key))

        self.update_left_offset(self.alarms)
Esempio n. 8
0
    def process(self, tup):
        message_buf = tup.values[0]
        message = json.loads(message_buf)

        message_id = message['message_id']
        message_uuid = message['message_uuid']
        self.log("start processing msg[%s:%s]" % (message_id, message_uuid))
        
        if message_id == PUT_METRIC_DATA_MSG_ID:
            metric_key = str(self.get_metric_key(message))
            storm.emit([metric_key, message_buf])
        elif message_id == PUT_METRIC_ALARM_MSG_ID:
            metric_key = message.get('metric_key')
            storm.emit([metric_key, message_buf])
        elif message_id == DELETE_ALARMS_MSG_ID:
            project_id = message['project_id']
            alarmkeys = message['alarmkeys']
            for alarmkey in alarmkeys:
                try:
                    alarmkey_uuid = UUID(alarmkey)
                    metric_key = self.get_alarm_metric_key(alarmkey_uuid)
                    metric_key = str(metric_key)
                    if metric_key:
                        message['alarmkey'] = alarmkey
                        storm.emit([metric_key, json.dumps(message)])
                except Exception as e:
                    storm.log("Alarm %s does not exists" % alarmkey)
                    storm.log(traceback.format_exc(e))
        elif message_id == SET_ALARM_STATE_MSG_ID:
            project_id = message['project_id']
            alarm_name = message['alarm_name']
            alarm_key = self.cass.get_metric_alarm_key(project_id,
                                                       alarm_name)
            if alarm_key:
                alarm = self.cass.get_metric_alarm(alarm_key)
                metric_key = str(alarm['metric_key'])
                storm.emit([metric_key, json.dumps(message)])
Esempio n. 9
0
    def alarm_history_state_update(self, alarmkey, alarm, notification_message):
#                notification_message = {
#                    'method': "email",
#                    'receivers': email_receivers,
#                    'subject': message['subject'],
#                    'body': message['body']
#                }        
        item_type = 'Action'
        project_id = alarm['project_id']
        history_summary = ("Message '%(subject)s' is sent via %(method)s" % 
                           notification_message)
        timestamp = utils.utcnow()
        
        history_key = uuid4()
        column = {'project_id':project_id,
                  'alarm_key':UUID(alarmkey),
                  'alarm_name':alarm['alarm_name'],
                  'history_data': json.dumps(notification_message),
                  'history_item_type':item_type,
                  'history_summary':history_summary,
                  'timestamp':timestamp}
        
        self.cass.insert_alarm_history(history_key, column)
        storm.log("alarm history \n %s" % history_summary)
Esempio n. 10
0
    def alarm_history_state_update(self, alarmkey, alarm, new_state, old_state):
        item_type = "StateUpdate"
        project_id = alarm["project_id"]
        summary_tpl = "Alarm updated from %s to %s"
        summary = summary_tpl % (
            old_state.get("stateValue", "INSUFFICIENT_DATA"),
            new_state.get("stateValue", "INSUFFICIENT_DATA"),
        )
        timestamp = utils.utcnow()
        data = {"newState": new_state, "oldState": old_state, "version": "1.0"}

        history_key = uuid.uuid4()
        column = {
            "project_id": project_id,
            "alarm_key": alarmkey,
            "alarm_name": alarm["alarm_name"],
            "history_data": json.dumps(data),
            "history_item_type": item_type,
            "history_summary": summary,
            "timestamp": timestamp,
        }

        self.cass.insert_alarm_history(history_key, column)
        storm.log("alarm history \n %s" % summary)
Esempio n. 11
0
 def put_alarm(self, project_id, metricalarm):
     alarm_name = metricalarm.get("alarm_name")
     alarm_key = self.cass.get_metric_alarm_key(project_id, alarm_name)
     if alarm_key:
         ret = self.cass.get_metric_alarm(alarm_key)
         if ret:
             self.alarms[alarm_key] = ret
             storm.log("alarm key is [%s]" % alarm_key)
             self.update_left_offset(self.alarms)
         else:
             storm.log("alarm key [%s] is found, but alarm is not found." % alarm_key)
     else:
         storm.log("no alarm key [%s]" % alarm_key)
Esempio n. 12
0
 def log(self, msg):
     storm.log("[%s:%d] %s" % (self.BOLT_NAME, self.pid, msg))
Esempio n. 13
0
    def _check_alarm(self, alarmkey, alarm, query_time=None):
        period = int(alarm["period"] / 60)
        evaluation_periods = alarm["evaluation_periods"]
        statistic = alarm["statistic"]
        threshold = alarm["threshold"]
        cmp_op = self.CMP_MAP[alarm["comparison_operator"]]
        unit = alarm["unit"]
        state_value = alarm["state_value"]
        time_difference_buffer_min = 3

        query_time = query_time if query_time else utils.utcnow()

        for i in range(time_difference_buffer_min):
            end_idx = query_time.replace(second=0, microsecond=0) - (i + 1) * datetools.Minute()
            if not isnull(self.df[statistic].ix[end_idx]):
                break

        start_idx = end_idx - (period * evaluation_periods) * datetools.Minute()
        start_ana_idx = start_idx - datetools.Minute() * period

        func = self.ROLLING_FUNC_MAP[statistic]
        data = func(self.df[statistic].ix[start_ana_idx:end_idx], period, min_periods=0).ix[start_idx:end_idx:period][
            1:
        ]
        recent_datapoints = list(data)

        if unit and statistic is not "SampleCount":
            data = data / utils.UNIT_CONV_MAP[unit]
            threshold = threshold / utils.UNIT_CONV_MAP[unit]

        data = data.dropna()

        query_date = utils.strtime(query_time)
        reason_data = {
            "period": alarm["period"],
            "queryDate": query_date,
            "recentDatapoints": recent_datapoints,
            "startDate": utils.strtime(start_idx),
            "statistic": statistic,
            "threshold": threshold,
            "version": "1.0",
        }
        old_state = {
            "stateReason": alarm.get("reason", ""),
            "stateValue": alarm.get("state_value", "INSUFFICIENT_DATA"),
            "stateReasonData": json.loads(alarm.get("reason_data", "{}")),
        }
        json_reason_data = json.dumps(reason_data)

        if len(data) < evaluation_periods:
            if state_value != "INSUFFICIENT_DATA":
                template = _("Insufficient Data: %d datapoints were unknown.")
                reason = template % (evaluation_periods - len(data))
                new_state = {"stateReason": reason, "stateReasonData": reason_data, "stateValue": "INSUFFICIENT_DATA"}
                self.update_alarm_state(alarmkey, "INSUFFICIENT_DATA", reason, json_reason_data, query_time)
                self.cass.update_alarm_state(alarmkey, "INSUFFICIENT_DATA", reason, json_reason_data, query_time)
                self.alarm_history_state_update(alarmkey, alarm, new_state, old_state)
                self.do_alarm_action(alarmkey, alarm, new_state, old_state, query_date)
                storm.log("INSUFFICIENT_DATA alarm")
        else:
            crossed = reduce(operator.and_, cmp_op(data, threshold))
            com_op = alarm["comparison_operator"]

            if crossed:
                template = _(
                    "Threshold Crossed: %d datapoints were %s "
                    + "the threshold(%f). "
                    + "The most recent datapoints: %s."
                )
                reason = template % (len(data), self.CMP_STR_MAP[com_op], threshold, recent_datapoints)
                if state_value != "ALARM":
                    new_state = {"stateReason": reason, "stateReasonData": reason_data, "stateValue": "ALARM"}

                    self.update_alarm_state(alarmkey, "ALARM", reason, json_reason_data, query_time)
                    self.cass.update_alarm_state(alarmkey, "ALARM", reason, json_reason_data, query_time)
                    self.alarm_history_state_update(alarmkey, alarm, new_state, old_state)
                    self.do_alarm_action(alarmkey, alarm, new_state, old_state, query_date)
                    storm.log("ALARM alarm")
            else:
                template = _(
                    "Threshold Crossed: %d datapoints were not %s "
                    + "the threshold(%f). "
                    + "The most recent datapoints: %s."
                )
                reason = template % (len(data), self.CMP_STR_MAP[com_op], threshold, recent_datapoints)
                if state_value != "OK":
                    new_state = {"stateReason": reason, "stateReasonData": reason_data, "stateValue": "OK"}
                    self.update_alarm_state(alarmkey, "OK", reason, json_reason_data, query_time)
                    self.cass.update_alarm_state(alarmkey, "OK", reason, json_reason_data, query_time)
                    self.alarm_history_state_update(alarmkey, alarm, new_state, old_state)
                    self.do_alarm_action(alarmkey, alarm, new_state, old_state, query_date)
                    storm.log("OK alarm")
Esempio n. 14
0
 def load_alarms(self):
     alarms = dict(self.cass.load_alarms(self.metric_key))
     storm.log("load_alarms %s for metric %s" % (str(alarms), self.metric_key))
     return alarms