class ActionBolt(storm.BasicBolt): BOLT_NAME = "ActionBolt" ENABLE_SEND_MAIL = FLAGS.get('enable_send_mail') ENABLE_SEND_SMS = FLAGS.get('enable_send_sms') NOTIFICATION_SERVER = FLAGS.get('notification_server_addr') def initialize(self, stormconf, context): self.cass = Cassandra() self.ctx = zmq.Context() self.sock = self.ctx.socket(zmq.PUSH) self.sock.connect(self.NOTIFICATION_SERVER) def log(self, msg): storm.log("[%s] %s" % (self.BOLT_NAME, msg)) def tracelog(self, e): msg = traceback.format_exc(e) for line in msg.splitlines(): self.log("TRACE: " + line) def get_action_type(self, action): if validate_email(action): return "email" elif validate_international_phonenumber(action): return "SMS" def do_action(self, action, message): action_type = self.get_action_type(action) if action_type == "email": self.send_email(action, message) elif action_type == "SMS": self.send_sms(action, message) def alarm_history_state_update(self, alarmkey, alarm, notification_message): # notification_message = { # 'method': "email", # 'receivers': email_receivers, # 'subject': message['subject'], # 'body': message['body'] # } item_type = 'Action' project_id = alarm['project_id'] history_summary = ("Message '%(subject)s' is sent via %(method)s" % notification_message) timestamp = utils.utcnow() history_key = uuid4() column = {'project_id':project_id, 'alarm_key':UUID(alarmkey), 'alarm_name':alarm['alarm_name'], 'history_data': json.dumps(notification_message), 'history_item_type':item_type, 'history_summary':history_summary, 'timestamp':timestamp} self.cass.insert_alarm_history(history_key, column) storm.log("alarm history \n %s" % history_summary) def process_action(self, tup): """ message example msg = { 'state': new_state['stateValue'], 'subject': "%s state has been changed from %s to %s" % (alarm['alarm_name'], old_state['stateValue'], new_state['stateValue']), 'body': new_state['stateReason'] } """ alarm_key = tup.values[0] message_buf = tup.values[1] message = json.loads(message_buf) self.log("message received: %s " % message_buf) alarm = self.cass.get_metric_alarm(UUID(alarm_key)) try: actions_enabled = alarm['actions_enabled'] except TypeError: msg = "alarm is not found [" + alarm_key + "]" self.log(msg) return False if message['state'] == 'OK': actions = json.loads(alarm['ok_actions']) elif message['state'] == 'INSUFFICIENT_DATA': actions = json.loads(alarm['insufficient_data_actions']) elif message['state'] == 'ALARM': actions = json.loads(alarm['alarm_actions']) self.log("actions enabled: %s actions: %s " % (actions_enabled, actions)) if actions_enabled and actions: if self.ENABLE_SEND_MAIL: email_receivers = [action for action in actions if self.get_action_type(action) == "email"] notification_message = { 'method': "email", 'receivers': email_receivers, 'subject': message['subject'], 'body': message['body'] } self.sock.send_pyobj(notification_message) self.log("notify: %s " % notification_message) self.alarm_history_state_update(alarm_key, alarm, notification_message) if self.ENABLE_SEND_SMS: sms_receivers = [action for action in actions if self.get_action_type(action) == "SMS"] notification_message = { 'method': "SMS", 'receivers': sms_receivers, 'subject': message['subject'], 'body': message['body'] } self.sock.send_pyobj(notification_message) self.log("notify: %s " % notification_message) self.alarm_history_state_update(alarm_key, alarm, notification_message) def process(self, tup): self.process_action(tup)
class API(object): ROLLING_FUNC_MAP = { 'Average': rolling_mean, 'Minimum': rolling_min, 'Maximum': rolling_max, 'SampleCount': rolling_sum, 'Sum': rolling_sum, } def __init__(self): self.cass = Cassandra() self.rpc = rpc.RemoteProcedureCall() def delete_alarms(self, project_id, alarm_names): alarmkeys = [] for alarm_name in alarm_names: k = self.cass.get_metric_alarm_key(project_id, alarm_name) if not k: raise ResourceNotFound("Alarm %s does not exists." % alarm_name) alarmkeys.append(str(k)) body = {'project_id': project_id, 'alarmkeys': alarmkeys} # UUID str self.rpc.send_msg(rpc.DELETE_ALARMS_MSG_ID, body) LOG.info("DELETE_ALARMS_MSG sent") def describe_alarms(self, project_id, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, next_token=None, state_value=None): """ params: project_id: string action_prefix: TODO: not implemented yet. alarm_name_prefix: string alarm_names: string list max_records: integer next_token: string (uuid type) state_value: string (OK | ALARM | INSUFFICIENT_DATA) """ alarms = self.cass.describe_alarms(project_id, action_prefix, alarm_name_prefix, alarm_names, max_records, next_token, state_value) return alarms def describe_alarms_for_metric(self, project_id, namespace, metric_name, dimensions=None, period=None, statistic=None, unit=None): """ params: project_id: string metric_name: string namespace: string dimensions: dict period: integer statistic: string (SampleCount | Average | Sum | Minimum | Maximum) unit: string """ alarms = self.cass.describe_alarms_for_metric(project_id, namespace, metric_name, dimensions=dimensions, period=period, statistic=statistic, unit=unit) return alarms def describe_alarm_history(self, project_id, alarm_name=None, end_date=None, history_item_type=None, max_records=None, next_token=None, start_date=None): histories = self.cass.describe_alarm_history( alarm_name=alarm_name, end_date=end_date, history_item_type=history_item_type, max_records=max_records, next_token=next_token, start_date=start_date, project_id=project_id ) return histories def set_alarm_actions(self, project_id, alarm_names, enabled): for alarm_name in alarm_names: alarm_key = self.cass.get_metric_alarm_key(project_id, alarm_name) self.cass.put_metric_alarm(alarm_key, {'actions_enabled':enabled}) def set_alarm_state(self, project_id, alarm_name, state_reason, state_value, state_reason_data=None): k = self.cass.get_metric_alarm_key(project_id, alarm_name) if not k: raise ResourceNotFound("Alarm %s does not exists." % alarm_name) body = {'project_id': project_id, 'alarm_name': alarm_name, 'state_reason': state_reason, 'state_value': state_value, 'state_reason_data': state_reason_data} self.rpc.send_msg(rpc.SET_ALARM_STATE_MSG_ID, body) LOG.info("SET_ALARM_STATE_MSG sent") def get_metric_statistics(self, project_id, end_time, metric_name, namespace, period, start_time, statistics, unit=None, dimensions=None): """ 입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다. """ def to_datapoint(df, idx): datapoint = df.ix[idx].dropna() if len(datapoint): return idx, datapoint end_idx = end_time.replace(second=0, microsecond=0) start_idx = start_time.replace(second=0, microsecond=0) start_ana_idx = start_idx - datetools.Minute() * (period / 60) daterange = DateRange(start_idx, end_idx, offset=datetools.Minute()) daterange_ana = DateRange(start_ana_idx, end_idx, offset=datetools.Minute()) # load default unit for metric from database if unit == "None" or not unit: metric_key = self.cass.get_metric_key( project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions ) if metric_key: unit = self.cass.get_metric_unit(metric_key) else: unit = "None" # load statistics data from database stats = self.cass.get_metric_statistics( project_id=project_id, namespace=namespace, metric_name=metric_name, start_time=start_ana_idx, end_time=end_time, period=period, statistics=statistics, dimensions=dimensions ) period = period / 60 # convert sec to min stat = DataFrame(index=daterange) for statistic, series in zip(statistics, stats): func = self.ROLLING_FUNC_MAP[statistic] if statistic == 'SampleCount': ts = TimeSeries(series, index=daterange_ana).fillna(0) else: ts = TimeSeries(series, index=daterange_ana) stat[statistic] = func(ts, period, min_periods=0) ret = filter(None, (to_datapoint(stat, i) for i in stat.index)) return ret def list_metrics(self, project_id, next_token=None, dimensions=None, metric_name=None, namespace=None): """ 입력받은 조건과 일치하는 메트릭의 리스트를 반환한다. """ metrics = self.cass.list_metrics(project_id, namespace, metric_name, dimensions, next_token) return metrics def put_metric_alarm(self, project_id, metricalarm): """ 알람을 DB에 넣고 값이 빈 dictionary 를 반환한다. 메트릭 유무 확인 알람 히스토리 발생. """ def metricalarm_for_json(metricalarm): alarm_for_json = { 'actionEnabled': metricalarm.get('actions_enabled', False), 'alarmActions': metricalarm.get('alarm_actions', []), 'alarmArn': metricalarm.get('alarm_arn'), 'alarmConfigurationUpdatedTimestamp': metricalarm.get('alarm_configuration_updated_timestamp'), 'alarmDescription': metricalarm.get('alarm_description'), 'alarmName': metricalarm.get('alarm_name'), 'comparisonOperator': metricalarm.get('comparison_operator'), 'dimensions': metricalarm.get('dimensions'), 'evaluationPeriods': metricalarm.get('evaluation_periods'), 'insufficientDataActions': metricalarm.get('insufficient_data_actions', []), 'metricName':metricalarm.get('metric_name'), 'namespace':metricalarm.get('namespace'), 'okactions':metricalarm.get('ok_actions', []), 'statistic':metricalarm.get('statistic'), 'threshold':metricalarm.get('threshold'), 'unit':metricalarm.get('unit'), } return alarm_for_json now = utils.utcnow() metricalarm = metricalarm.to_columns() # 메트릭 유무 확인 metric_key = self.cass.get_metric_key_or_create( project_id=project_id, namespace=metricalarm['namespace'], metric_name=metricalarm['metric_name'], dimensions=json.loads(metricalarm['dimensions']), unit=metricalarm['unit'], ) metricalarm['project_id'] = project_id metricalarm['metric_key'] = metric_key metricalarm['alarm_arn'] = "arn:spcs:synaps:%s:alarm:%s" % ( project_id, metricalarm['alarm_name'] ) metricalarm['alarm_configuration_updated_timestamp'] = now # 알람 유무 확인 alarm_key = self.cass.get_metric_alarm_key( project_id=project_id, alarm_name=metricalarm['alarm_name'] ) if alarm_key: history_type = 'Update' before_alarm = self.cass.get_metric_alarm(alarm_key) if before_alarm['metric_key'] != metricalarm['metric_key']: raise InvalidRequest("Metric cannot be changed.") metricalarm['state_updated_timestamp'] = \ before_alarm['state_updated_timestamp'] metricalarm['state_reason'] = before_alarm['state_reason'] metricalarm['state_reason_data'] = \ before_alarm['state_reason_data'] metricalarm['state_value'] = before_alarm['state_value'] else: history_type = "Create" alarm_key = uuid.uuid4() metricalarm['state_updated_timestamp'] = utils.utcnow() metricalarm['state_reason'] = "Unchecked: Initial alarm creation" metricalarm['state_reason_data'] = json.dumps({}) metricalarm['state_value'] = "INSUFFICIENT_DATA" # insert alarm into database self.cass.put_metric_alarm(alarm_key, metricalarm) LOG.debug("metric alarm inserted alarm key: %s" % (alarm_key)) # to make json, convert datetime type into str metricalarm['state_updated_timestamp'] = utils.strtime( metricalarm['state_updated_timestamp'] ) metricalarm['alarm_configuration_updated_timestamp'] = utils.strtime( metricalarm['alarm_configuration_updated_timestamp'] ) metricalarm['metric_key'] = str(metric_key) if history_type == "Update": history_data = json.dumps({ 'updatedAlarm':metricalarm_for_json(metricalarm), 'type':history_type, 'version': '1.0' }) summary = "Alarm %s updated" % metricalarm['alarm_name'] else: history_data = json.dumps({ 'createdAlarm': metricalarm_for_json(metricalarm), 'type':history_type, 'version': '1.0' }) summary = "Alarm %s created" % metricalarm['alarm_name'] history_key = uuid.uuid4() history_column = { 'project_id': project_id, 'alarm_key': alarm_key, 'alarm_name': metricalarm['alarm_name'], 'history_data': history_data, 'history_item_type': 'ConfigurationUpdate', 'history_summary':summary, 'timestamp': utils.utcnow() } self.cass.insert_alarm_history(history_key, history_column) message = {'project_id': project_id, 'metric_key': str(metric_key), 'metricalarm': metricalarm} self.rpc.send_msg(rpc.PUT_METRIC_ALARM_MSG_ID, message) LOG.info("PUT_METRIC_ALARM_MSG sent") return {} def put_metric_data(self, project_id, namespace, metric_name, dimensions, value, unit, timestamp, is_admin=False): """ metric data 를 입력받아 MQ 에 넣고 값이 빈 dictionary 를 반환한다. """ if namespace.startswith("SPCS/") and not is_admin: raise AdminRequired() message = {'project_id': project_id, 'namespace':namespace, 'metric_name': metric_name, 'dimensions': dimensions, 'value':value, 'unit':unit, 'timestamp':timestamp} self.rpc.send_msg(rpc.PUT_METRIC_DATA_MSG_ID, message) LOG.info("PUT_METRIC_DATA_MSG sent") return {}
class PutMetricBolt(storm.BasicBolt): BOLT_NAME = "PutMetricBolt" def initialize(self, stormconf, context): self.pid = os.getpid() self.cass = Cassandra() self.metrics = {} self.mc = memcache.Client(FLAGS.memcached_servers, debug=0) def process_put_metric_data_msg(self, metric_key, message): """ Put metric data into both memory and database """ # Load statistics data in memory if metric_key not in self.metrics: max_retries = 3 for i in range(max_retries + 1): try: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) break except ResourceNotFound: if i + 1 < max_retries: LOG.warn("Metric %s is not in the database. " \ "retry... %d", metric_key, i + 1) time.sleep(1) else: LOG.error("Metric %s is not in the database.", metric_key) return timestamp = utils.parse_strtime(message['timestamp']) self.metrics[metric_key].put_metric_data(metric_key, timestamp=timestamp, value=message['value'], unit=message['unit']) def process_put_metric_alarm_msg(self, metric_key, message): def get_alarm_key(project_id, alarm_name): key = self.cass.get_metric_alarm_key(project_id, alarm_name) return key def metricalarm_for_json(metricalarm): cut = metricalarm.get('alarm_configuration_updated_timestamp') alarm_for_json = { 'actionEnabled': metricalarm.get('actions_enabled', False), 'alarmActions': metricalarm.get('alarm_actions', []), 'alarmArn': metricalarm.get('alarm_arn'), 'alarmConfigurationUpdatedTimestamp': utils.strtime(cut), 'alarmDescription': metricalarm.get('alarm_description'), 'alarmName': metricalarm.get('alarm_name'), 'comparisonOperator': metricalarm.get('comparison_operator'), 'dimensions': metricalarm.get('dimensions'), 'evaluationPeriods': metricalarm.get('evaluation_periods'), 'insufficientDataActions': metricalarm.get('insufficient_data_actions', []), 'metricName':metricalarm.get('metric_name'), 'namespace':metricalarm.get('namespace'), 'okactions':metricalarm.get('ok_actions', []), 'statistic':metricalarm.get('statistic'), 'threshold':metricalarm.get('threshold'), 'unit':metricalarm.get('unit'), } return alarm_for_json if metric_key not in self.metrics: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) project_id = message['project_id'] metricalarm = message['metricalarm'] # build metricalarm column, alarmhistory column alarm_key = get_alarm_key(project_id, metricalarm['alarm_name']) history_type = 'Update' if alarm_key else 'Create' now = utils.utcnow() if history_type == 'Update': original_alarm = self.cass.get_metric_alarm(alarm_key) for dict_key in ['state_updated_timestamp', 'state_reason', 'state_reason_data', 'state_value', 'project_id']: metricalarm[dict_key] = original_alarm[dict_key] metricalarm['alarm_configuration_updated_timestamp'] = now history_data = json.dumps({ 'updatedAlarm':metricalarm_for_json(metricalarm), 'type':history_type, 'version': '1.0' }) summary = "Alarm %s updated" % metricalarm['alarm_name'] else: alarm_key = uuid.uuid4() state_reason = "Unchecked: Initial alarm creation" metricalarm.update({'state_updated_timestamp': now, 'alarm_configuration_updated_timestamp': now, 'state_reason': state_reason, 'state_reason_data': json.dumps({}), 'state_value': "INSUFFICIENT_DATA", 'project_id': project_id}) history_data = json.dumps({ 'createdAlarm': metricalarm_for_json(metricalarm), 'type':history_type, 'version': '1.0' }) summary = "Alarm %s created" % metricalarm['alarm_name'] metricalarm['metric_key'] = metric_key history_key = uuid.uuid4() history_column = { 'project_id': project_id, 'alarm_key': alarm_key, 'alarm_name': metricalarm['alarm_name'], 'history_data': history_data, 'history_item_type': 'ConfigurationUpdate', 'history_summary':summary, 'timestamp': utils.utcnow() } self.cass.put_metric_alarm(alarm_key, metricalarm) self.cass.insert_alarm_history(history_key, history_column) LOG.info("metric alarm inserted: %s %s", alarm_key, metricalarm) # load metric in memory self.metrics[metric_key].put_alarm(alarm_key, metricalarm) def process_delete_metric_alarms_msg(self, metric_key, message): alarmkey = UUID(message['alarmkey']) LOG.debug("Metric keys %s", self.metrics.keys()) if metric_key not in self.metrics: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) self.metrics[metric_key].delete_metric_alarm(alarmkey) def process_set_alarm_state_msg(self, metric_key, message): project_id = message.get('project_id') alarm_name = message.get('alarm_name') state_reason_data = message.get('state_reason_data') if metric_key not in self.metrics: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) metric = self.metrics[metric_key] ret = self.cass.get_metric_alarm_key(project_id, alarm_name) if ret: alarm_key = ret try: metricalarm = metric.alarms[alarm_key] except KeyError: LOG.warn("alarm key [%s] is found, but alarm is not found.", alarm_key) return else: LOG.warn("alarm key [%s] is not found.", alarm_key) return metricalarm['state_reason'] = message.get('state_reason') metricalarm['state_value'] = message.get('state_value') metricalarm['state_reason_data'] = message.get('state_reason_data') # write into database alarm_columns = {'state_reason':message.get('state_reason'), 'state_value':message.get('state_value')} if state_reason_data: alarm_columns['state_reason_data'] = state_reason_data alarm_columns['project_id'] = project_id self.cass.put_metric_alarm(alarm_key, alarm_columns) def process_check_metric_alarms_msg(self, message): query_time = datetime.utcnow() stale_metrics = [] ready_to_evaluate = message.get('ready_to_evaluate') for key, metric in self.metrics.iteritems(): is_stale = metric.is_stale() if is_stale: stale_metrics.append(key) if (not is_stale) and ready_to_evaluate: metric.check_alarms(query_time) for key in stale_metrics: try: metric = self.metrics.pop(key) metric.delete() LOG.audit("Stale metric(%s) is deleted", str(key)) except KeyError: LOG.error("KeyError occurred when delete stale metric(%s)", str(key)) def process(self, tup): message = json.loads(tup.values[1]) message_id = message['message_id'] message_uuid = message.get('message_uuid', None) LOG.info("start processing msg[%s:%s]", message_id, message_uuid) try: metric_key = UUID(tup.values[0]) if tup.values[0] else None except ValueError: LOG.error("badly formed hexadecimal UUID string - %s", tup.values[0]) return if message_id == PUT_METRIC_DATA_MSG_ID: # message deduplicate if message_uuid: mckey = "%s_message_uuid" % message_uuid if not self.mc.get(mckey): # 300 seconds TTL self.mc.set(mckey, 1, 300) LOG.info("process put_metric_data_msg (%s)", message) self.process_put_metric_data_msg(metric_key, message) else: LOG.info("Message duplicated. %s", message_uuid) elif message_id == PUT_METRIC_ALARM_MSG_ID: LOG.info("process put_metric_alarm_msg (%s)", message) self.process_put_metric_alarm_msg(metric_key, message) elif message_id == DELETE_ALARMS_MSG_ID: LOG.info("process delete_alarms_msg (%s)", message) self.process_delete_metric_alarms_msg(metric_key, message) elif message_id == SET_ALARM_STATE_MSG_ID: LOG.info("process set_alarm_state_msg (%s)", message) self.process_set_alarm_state_msg(metric_key, message) elif message_id == CHECK_METRIC_ALARM_MSG_ID: LOG.info("process check_metric_alarm_msg (%s)", message) self.process_check_metric_alarms_msg(message) else: LOG.error("unknown message")
class API(object): ROLLING_FUNC_MAP = { 'Average': rolling_mean, 'Minimum': rolling_min, 'Maximum': rolling_max, 'SampleCount': rolling_sum, 'Sum': rolling_sum, } def __init__(self): self.cass = Cassandra() self.rpc = rpc.RemoteProcedureCall() def delete_alarms(self, context, project_id, alarm_names): alarmkeys = [] for alarm_name in alarm_names: k = self.cass.get_metric_alarm_key(project_id, alarm_name) if not k: raise ResourceNotFound("Alarm %s does not exists." % alarm_name) alarmkeys.append(str(k)) body = {'project_id': project_id, 'alarmkeys': alarmkeys, 'context': context.to_dict()} # UUID str self.rpc.send_msg(rpc.DELETE_ALARMS_MSG_ID, body) LOG.info("DELETE_ALARMS_MSG sent") def describe_alarms(self, project_id, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, next_token=None, state_value=None): """ params: project_id: string action_prefix: TODO: not implemented yet. alarm_name_prefix: string alarm_names: string list max_records: integer next_token: string (uuid type) state_value: string (OK | ALARM | INSUFFICIENT_DATA) """ alarms = self.cass.describe_alarms(project_id, action_prefix, alarm_name_prefix, alarm_names, max_records, next_token, state_value) return alarms def describe_alarms_for_metric(self, project_id, namespace, metric_name, dimensions=None, period=None, statistic=None, unit=None): """ params: project_id: string metric_name: string namespace: string dimensions: dict period: integer statistic: string (SampleCount | Average | Sum | Minimum | Maximum) unit: string """ alarms = self.cass.describe_alarms_for_metric(project_id, namespace, metric_name, dimensions=dimensions, period=period, statistic=statistic, unit=unit) return alarms def describe_alarm_history(self, project_id, alarm_name=None, end_date=None, history_item_type=None, max_records=None, next_token=None, start_date=None): histories = self.cass.describe_alarm_history( alarm_name=alarm_name, end_date=end_date, history_item_type=history_item_type, max_records=max_records, next_token=next_token, start_date=start_date, project_id=project_id ) return histories def set_alarm_actions(self, context, project_id, alarm_names, enabled): for alarm_name in alarm_names: alarm_key = self.cass.get_metric_alarm_key(project_id, alarm_name) if not alarm_key: raise InvalidParameterValue("Alarm %s does not exist" % alarm_name) for alarm_name in alarm_names: alarm_key = self.cass.get_metric_alarm_key(project_id, alarm_name) history_data = {'actions_enabled':enabled, 'project_id': project_id} self.cass.put_metric_alarm(alarm_key, history_data) if enabled: summary = "Alarm actions for %s are enabled" % alarm_name else: summary = "Alarm actions for %s are disabled" % alarm_name history_key = uuid.uuid4() history_column = { 'project_id': project_id, 'alarm_key': alarm_key, 'alarm_name': alarm_name, 'history_data': json.dumps(history_data), 'history_item_type': 'ConfigurationUpdate', 'history_summary':summary, 'timestamp': utils.utcnow() } self.cass.insert_alarm_history(history_key, history_column) def set_alarm_state(self, context, project_id, alarm_name, state_reason, state_value, state_reason_data=None): k = self.cass.get_metric_alarm_key(project_id, alarm_name) if not k: raise ResourceNotFound("Alarm %s does not exists." % alarm_name) body = {'project_id': project_id, 'alarm_name': alarm_name, 'state_reason': state_reason, 'state_value': state_value, 'state_reason_data': state_reason_data, 'context': context.to_dict()} self.rpc.send_msg(rpc.SET_ALARM_STATE_MSG_ID, body) LOG.info("SET_ALARM_STATE_MSG sent") def get_metric_statistics(self, project_id, end_time, metric_name, namespace, period, start_time, statistics, unit=None, dimensions=None): """ 입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다. """ def to_datapoint(df, idx): datapoint = df.ix[idx].dropna() if len(datapoint): return idx, datapoint end_idx = end_time.replace(second=0, microsecond=0) start_idx = start_time.replace(second=0, microsecond=0) start_ana_idx = start_idx - datetools.Minute() * (period / 60) daterange = DateRange(start_idx, end_idx, offset=datetools.Minute()) daterange_ana = DateRange(start_ana_idx, end_idx, offset=datetools.Minute()) # load default unit for metric from database if unit == "None" or not unit: metric_key = self.cass.get_metric_key( project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions ) if metric_key: unit = self.cass.get_metric_unit(metric_key) else: unit = "None" # load statistics data from database stats = self.cass.get_metric_statistics( project_id=project_id, namespace=namespace, metric_name=metric_name, start_time=start_ana_idx, end_time=end_time, period=period, statistics=statistics, dimensions=dimensions ) period = period / 60 # convert sec to min stat = DataFrame(index=daterange) for statistic, series in zip(statistics, stats): func = self.ROLLING_FUNC_MAP[statistic] ts = TimeSeries(series, index=daterange_ana) rolled_ts = func(ts, period, min_periods=0) stat[statistic] = rolled_ts.ix[::period] LOG.debug("stat %s\n%s" % (statistic, stat[statistic])) ret = filter(None, (to_datapoint(stat, i) for i in stat.index)) return ret, unit def list_metrics(self, project_id, next_token=None, dimensions=None, metric_name=None, namespace=None): """ List Metrics """ metrics = self.cass.list_metrics(project_id, namespace, metric_name, dimensions, next_token) return metrics def put_metric_alarm(self, context, project_id, metricalarm): """ Send put metric alarm message to Storm """ def _validate_actions(alarm): for actions in (alarm.ok_actions, alarm.insufficient_data_actions, alarm.alarm_actions): for action in actions: if utils.validate_groupnotification_action(action): group = utils.parse_groupnotification_action(action) if not self.cass.get_notification_group(group): raise InvalidNotificationGroup() now = utils.utcnow() _validate_actions(metricalarm) metricalarm = metricalarm.to_columns() alarm_name = metricalarm['alarm_name'] namespace = metricalarm['namespace'] metric_name = metricalarm['metric_name'] dimensions = json.loads(metricalarm['dimensions']) # check if we have metric in database metric_key = self.cass.get_metric_key_or_create(project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions, unit=metricalarm['unit']) update_data = { 'project_id': project_id, 'metric_key': str(metric_key), 'alarm_arn': "arn:spcs:synaps:%s:alarm:%s" % (project_id, alarm_name), 'alarm_configuration_updated_timestamp': utils.strtime(now) } metricalarm.update(update_data) # check if metric is changed alarm_key = self.cass.get_metric_alarm_key(project_id=project_id, alarm_name=alarm_name) if alarm_key: original_alarm = self.cass.get_metric_alarm(alarm_key) if (str(original_alarm['metric_key']) != str(metricalarm['metric_key'])): raise InvalidRequest("Metric cannot be changed. " "Delete alarm and retry.") else: # If alarm is newly added, check quotas # check alarm quota per project project_quota = FLAGS.get('alarm_quota_per_project') alarms_in_project = self.cass.get_alarm_count(project_id) if alarms_in_project >= project_quota: LOG.info("Too many alarms(%d) in the project %s", alarms_in_project, project_id) raise ProjectAlarmQuotaExceeded() # check alarm quota per metric metric_quota = FLAGS.get('alarm_quota_per_metric') alarms_per_metric = self.cass.get_alarms_per_metric_count( project_id, namespace, metric_name, dimensions) if alarms_per_metric >= metric_quota: LOG.info("Too many alarms(%d) for this metric", alarms_per_metric) raise MetricAlarmQuotaExceeded() message = {'project_id': project_id, 'metric_key': str(metric_key), 'metricalarm': metricalarm, 'context': context.to_dict()} self.rpc.send_msg(rpc.PUT_METRIC_ALARM_MSG_ID, message) LOG.info("PUT_METRIC_ALARM_MSG sent") return {} def put_metric_data(self, context, project_id, namespace, metric_name, dimensions, value, unit, timestamp=None, is_admin=False): admin_namespace = FLAGS.get('admin_namespace') if namespace.startswith(admin_namespace) and not is_admin: raise AdminRequired() timestamp = timestamp or utils.strtime(utils.utcnow()) message = {'project_id': project_id, 'namespace':namespace, 'metric_name': metric_name, 'dimensions': dimensions, 'value':value, 'unit':unit, 'timestamp':timestamp, 'context': context.to_dict()} self.rpc.send_msg(rpc.PUT_METRIC_DATA_MSG_ID, message) LOG.info("PUT_METRIC_DATA_MSG sent") return {}
class ActionBolt(storm.BasicBolt): BOLT_NAME = "ActionBolt" def initialize(self, stormconf, context): self.pid = os.getpid() self.cass = Cassandra() self.enable_send_mail = FLAGS.get('enable_send_mail') self.enable_send_sms = FLAGS.get('enable_send_sms') self.enable_instance_action = FLAGS.get('enable_instance_action') self.notification_server = FLAGS.get('notification_server_addr') self.statistics_ttl = FLAGS.get('statistics_ttl') self.smtp_server = FLAGS.get('smtp_server') self.mail_sender = FLAGS.get('mail_sender') self.sms_sender = FLAGS.get('sms_sender') self.sms_db_host = FLAGS.get('sms_database_host') self.sms_db_port = FLAGS.get('sms_database_port') self.sms_db = FLAGS.get('sms_database') self.sms_db_username = FLAGS.get('sms_db_username') self.sms_db_password = FLAGS.get('sms_db_password') self.nova_auth_url = FLAGS.get('nova_auth_url') self.nova_admin_tenant_name = FLAGS.get('admin_tenant_name') self.nova_admin_user = FLAGS.get('admin_user') self.nova_admin_password = FLAGS.get('admin_password') self.region = FLAGS.get('region') self.lms_template = FLAGS.get('lms_template') self.email_body_template = FLAGS.get('email_body_template') self.email_subject_template = FLAGS.get('email_subject_template') self.api = API() def get_action_type(self, action): if validate_email(action): return "email" elif validate_international_phonenumber(action): return "SMS" elif validate_instance_action(action): return "InstanceAction" elif validate_groupnotification_action(action): return "GroupNotificationAction" def meter_sms_actions(self, project_id, receivers): ctxt = get_admin_context() local_receivers = [r for r in receivers if r.startswith("+82")] international_receivers = [ r for r in receivers if not r.startswith("+82") ] self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="LocalSMSActionCount", dimensions={}, value=len(local_receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="InternationalSMSActionCount", dimensions={}, value=len(international_receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) LOG.audit("Meter SMS: %s %s %s", project_id, len(receivers), receivers) def meter_email_actions(self, project_id, receivers): ctxt = get_admin_context() self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="EmailActionCount", dimensions={}, value=len(receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) LOG.audit("Meter Email: %s %s %s", project_id, len(receivers), receivers) def meter_instance_actions(self, project_id, receivers): ctxt = get_admin_context() self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="InstanceActionCount", dimensions={}, value=len(receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) LOG.audit("Meter InstanceAction: %s %s %s", project_id, len(receivers), receivers) def alarm_history_state_update(self, alarmkey, alarm, notification_message): """ update alarm history based on notification message notification_message = { 'method': "email", 'receivers': email_receivers, 'subject': message['subject'], 'body': message['body'], 'state': "ok" | "failed" } """ item_type = 'Action' project_id = alarm['project_id'] if notification_message.get("method") in ("email", "SMS"): if notification_message.get('state', 'ok') == 'ok': history_summary = "Message '%(subject)s' is sent via"\ " %(method)s" % notification_message else: history_summary = "Failed to send a message '%(subject)s' via"\ " %(method)s" % notification_message elif notification_message.get("method") in ("InstanceAction"): if notification_message.get('state', 'ok') == 'ok': history_summary = "%(method)s %(receivers)s is invoked." % \ notification_message else: history_summary = "Failed to invoke %(method)s %(receivers)s."\ % notification_message timestamp = utils.utcnow() history_key = uuid4() column = { 'project_id': project_id, 'alarm_key': UUID(alarmkey), 'alarm_name': alarm['alarm_name'], 'history_data': json.dumps(notification_message), 'history_item_type': item_type, 'history_summary': history_summary, 'timestamp': timestamp } self.cass.insert_alarm_history(history_key, column, ttl=self.statistics_ttl) LOG.info("History updated. %s", history_summary) def process_action(self, tup): """ message example msg = { 'state': new_state['stateValue'], 'subject': "%s state has been changed from %s to %s" % (alarm['alarm_name'], old_state['stateValue'], new_state['stateValue']), 'body': new_state['stateReason'] } """ def convert_group_notification(actions): ret = [] for ac in actions: if validate_groupnotification_action(ac): groupname = parse_groupnotification_action(ac) new_actions = self.cass.get_notification_group(groupname) ret += list(new_actions) else: ret.append(ac) return ret alarm_key = tup.values[0] message_buf = tup.values[1] message = json.loads(message_buf) LOG.info("start processing tup %s", tup) alarm = self.cass.get_metric_alarm(UUID(alarm_key)) try: actions_enabled = alarm['actions_enabled'] except TypeError: LOG.debug("Alarm(%s) is not found", alarm_key) return False if message['state'] == 'OK': actions = json.loads(alarm['ok_actions']) elif message['state'] == 'INSUFFICIENT_DATA': actions = json.loads(alarm['insufficient_data_actions']) elif message['state'] == 'ALARM': actions = json.loads(alarm['alarm_actions']) actions = convert_group_notification(actions) if actions_enabled and actions: if self.enable_send_sms: self.process_sms_action(alarm_key, alarm, message, actions) if self.enable_send_mail: self.process_email_action(alarm_key, alarm, message, actions) if self.enable_instance_action: self.process_instance_action(alarm_key, alarm, message, actions) def do_instance_action(self, alarm_key, alarm, instance_actions): nc = utils.get_python_novaclient() for action in instance_actions: action_type, vm_uuid = parse_instance_action(action) server = nc.servers.get(vm_uuid) if action_type == "Migrate": server.migrate() LOG.info("instance action %s invoked for %s", action_type, server) elif action_type == "Reboot": server.reboot('HARD') LOG.info("instance action %s invoked for %s", action_type, server) def process_instance_action(self, alarm_key, alarm, message, actions): instance_actions = [ action for action in actions if self.get_action_type(action) == "InstanceAction" ] instance_action_message = { 'method': "InstanceAction", 'receivers': instance_actions, 'subject': message['subject'], 'body': message['body'], 'state': 'ok' } if instance_actions: try: self.do_instance_action(alarm_key, alarm, instance_actions) except Exception as e: instance_action_message['state'] = 'failed' LOG.exception(e) LOG.audit("InstanceAction is invoked. %s", instance_action_message) self.alarm_history_state_update(alarm_key, alarm, instance_action_message) if instance_action_message['state'] != 'failed': self.meter_instance_actions(alarm['project_id'], instance_actions) def process_email_action(self, alarm_key, alarm, message, actions): email_receivers = list( set([ action for action in actions if self.get_action_type(action) == "email" ])) notification_message = { 'method': "email", 'receivers': email_receivers, 'subject': message['subject'], 'body': message['body'], 'state': 'ok', 'alarm_description': message['alarm_description'] } if email_receivers: try: self.send_email(notification_message) except Exception as e: notification_message['state'] = 'failed' LOG.exception(e) LOG.audit("Email sent. %s", notification_message) self.alarm_history_state_update(alarm_key, alarm, notification_message) if notification_message['state'] != 'failed': self.meter_email_actions(alarm['project_id'], email_receivers) def process_sms_action(self, alarm_key, alarm, message, actions): sms_receivers = list( set([ action for action in actions if self.get_action_type(action) == "SMS" ])) notification_message = { 'method': "SMS", 'receivers': sms_receivers, 'subject': message['subject'], 'body': message['body'], 'state': 'ok', 'alarm_description': message['alarm_description'] } if sms_receivers: try: self.send_sms(notification_message) except Exception as e: notification_message['state'] = 'failed' LOG.exception(e) LOG.audit("SMS sent. %s", notification_message) self.alarm_history_state_update(alarm_key, alarm, notification_message) if notification_message['state'] != 'failed': self.meter_sms_actions(alarm['project_id'], sms_receivers) def send_sms(self, message): Q_LOCAL = """insert into MMS_SEND(REG_TIME, MSG_SEQ, MSG_KEY, RECEIVER, SENDER, SUBJECT, MESSAGE) values (now()+0, %s, %s, %s, %s, %s, %s) """ Q_NAT = """insert into SMS_SEND(REG_TIME, MSG_KEY, RECEIVER, SENDER, MESSAGE, NAT_CODE) values (now()+0, %s, %s, %s, %s, %s) """ def build_query(receiver, message): nat, local_no = parse_number(receiver) subject = message['subject'] body = message['body'] description = message['alarm_description'] # random integer for msg_key msg_key = randint(1, 10**15) if nat == None: message = self.lms_template % { 'region': self.region, 'subject': subject, 'reason': body, 'description': description } ret = Q_LOCAL params = (msg_key, msg_key, local_no, self.sms_sender, subject, message) else: if len(subject) > 80: subject = subject[:77] + "..." ret = Q_NAT params = (msg_key, local_no, self.sms_sender, subject, nat) return ret, params def parse_number(no): nat, local_no = no.split(' ', 1) if nat.startswith("+"): nat = int(nat[1:]) else: nat = int(nat) if nat == 82: # Korean national code nat = None local_no = '0' + local_no.replace(' ', '') else: local_no = local_no.replace(' ', '') return nat, local_no # message example. # # {'body': u'Threshold Crossed: 3 datapoints were greater than the # threshold(50.000000). The most recent datapoints: # [110.0, 110.0, 60.0]. at 2012-08-28T10:17:50.494902', # 'receivers': [u'+82 1093145616'], # 'method': 'SMS', # 'subject': u'AlarmActionTest state has been changed from OK to # ALARM at 2012-08-28T10:17:50.494902', # 'alarm_description': u''} # LOG.debug("Connect to mysql db %s@%s:%s %s" % (self.sms_db_username, self.sms_db_host, self.sms_db_port, self.sms_db)) conn = db.connect(host=self.sms_db_host, port=self.sms_db_port, db=self.sms_db, user=self.sms_db_username, passwd=self.sms_db_password, connect_timeout=30, charset='utf8') c = conn.cursor() for receiver in message['receivers']: q, params = build_query(receiver, message) c.execute(q, params) c.close() conn.commit() conn.close() def send_email(self, message): msg_dict = { 'region': escape(self.region), 'reason': escape(message['body']), 'subject': escape(message['subject']), 'description': escape(message['alarm_description']) } body = body = self.email_body_template % msg_dict msg = MIMEText(body, 'html', 'utf8') msg['Subject'] = self.email_subject_template % msg_dict msg['From'] = self.mail_sender msg['To'] = ", ".join(message['receivers']) s = smtplib.SMTP(self.smtp_server, timeout=30) s.sendmail(self.mail_sender, message['receivers'], msg.as_string()) s.quit() def process(self, tup): self.process_action(tup)
class PutMetricBolt(storm.BasicBolt): BOLT_NAME = "PutMetricBolt" def initialize(self, stormconf, context): self.pid = os.getpid() self.cass = Cassandra() self.metrics = {} self.mc = memcache.Client(FLAGS.memcached_servers, debug=0) def process_put_metric_data_msg(self, metric_key, message): """ Put metric data into both memory and database """ # Load statistics data in memory if metric_key not in self.metrics: max_retries = 3 for i in range(max_retries + 1): try: self.metrics[metric_key] = MetricMonitor( metric_key, self.cass) break except ResourceNotFound: if i + 1 < max_retries: LOG.warn("Metric %s is not in the database. " \ "retry... %d", metric_key, i + 1) time.sleep(1) else: LOG.error("Metric %s is not in the database.", metric_key) return timestamp = utils.parse_strtime(message['timestamp']) self.metrics[metric_key].put_metric_data(metric_key, timestamp=timestamp, value=message['value'], unit=message['unit']) def process_put_metric_alarm_msg(self, metric_key, message): def get_alarm_key(project_id, alarm_name): key = self.cass.get_metric_alarm_key(project_id, alarm_name) return key def metricalarm_for_json(metricalarm): cut = metricalarm.get('alarm_configuration_updated_timestamp') alarm_for_json = { 'actionEnabled': metricalarm.get('actions_enabled', False), 'alarmActions': metricalarm.get('alarm_actions', []), 'alarmArn': metricalarm.get('alarm_arn'), 'alarmConfigurationUpdatedTimestamp': utils.strtime(cut), 'alarmDescription': metricalarm.get('alarm_description'), 'alarmName': metricalarm.get('alarm_name'), 'comparisonOperator': metricalarm.get('comparison_operator'), 'dimensions': metricalarm.get('dimensions'), 'evaluationPeriods': metricalarm.get('evaluation_periods'), 'insufficientDataActions': metricalarm.get('insufficient_data_actions', []), 'metricName': metricalarm.get('metric_name'), 'namespace': metricalarm.get('namespace'), 'okactions': metricalarm.get('ok_actions', []), 'statistic': metricalarm.get('statistic'), 'threshold': metricalarm.get('threshold'), 'unit': metricalarm.get('unit'), } return alarm_for_json if metric_key not in self.metrics: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) project_id = message['project_id'] metricalarm = message['metricalarm'] # build metricalarm column, alarmhistory column alarm_key = get_alarm_key(project_id, metricalarm['alarm_name']) history_type = 'Update' if alarm_key else 'Create' now = utils.utcnow() if history_type == 'Update': original_alarm = self.cass.get_metric_alarm(alarm_key) for dict_key in [ 'state_updated_timestamp', 'state_reason', 'state_reason_data', 'state_value', 'project_id' ]: metricalarm[dict_key] = original_alarm[dict_key] metricalarm['alarm_configuration_updated_timestamp'] = now history_data = json.dumps({ 'updatedAlarm': metricalarm_for_json(metricalarm), 'type': history_type, 'version': '1.0' }) summary = "Alarm %s updated" % metricalarm['alarm_name'] else: alarm_key = uuid.uuid4() state_reason = "Unchecked: Initial alarm creation" metricalarm.update({ 'state_updated_timestamp': now, 'alarm_configuration_updated_timestamp': now, 'state_reason': state_reason, 'state_reason_data': json.dumps({}), 'state_value': "INSUFFICIENT_DATA", 'project_id': project_id }) history_data = json.dumps({ 'createdAlarm': metricalarm_for_json(metricalarm), 'type': history_type, 'version': '1.0' }) summary = "Alarm %s created" % metricalarm['alarm_name'] metricalarm['metric_key'] = metric_key history_key = uuid.uuid4() history_column = { 'project_id': project_id, 'alarm_key': alarm_key, 'alarm_name': metricalarm['alarm_name'], 'history_data': history_data, 'history_item_type': 'ConfigurationUpdate', 'history_summary': summary, 'timestamp': utils.utcnow() } self.cass.put_metric_alarm(alarm_key, metricalarm) self.cass.insert_alarm_history(history_key, history_column) LOG.info("metric alarm inserted: %s %s", alarm_key, metricalarm) # load metric in memory self.metrics[metric_key].put_alarm(alarm_key, metricalarm) def process_delete_metric_alarms_msg(self, metric_key, message): alarmkey = UUID(message['alarmkey']) LOG.debug("Metric keys %s", self.metrics.keys()) if metric_key not in self.metrics: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) self.metrics[metric_key].delete_metric_alarm(alarmkey) def process_set_alarm_state_msg(self, metric_key, message): project_id = message.get('project_id') alarm_name = message.get('alarm_name') state_reason_data = message.get('state_reason_data') if metric_key not in self.metrics: self.metrics[metric_key] = MetricMonitor(metric_key, self.cass) metric = self.metrics[metric_key] ret = self.cass.get_metric_alarm_key(project_id, alarm_name) if ret: alarm_key = ret try: metricalarm = metric.alarms[alarm_key] except KeyError: LOG.warn("alarm key [%s] is found, but alarm is not found.", alarm_key) return else: LOG.warn("alarm key [%s] is not found.", alarm_key) return metricalarm['state_reason'] = message.get('state_reason') metricalarm['state_value'] = message.get('state_value') metricalarm['state_reason_data'] = message.get('state_reason_data') # write into database alarm_columns = { 'state_reason': message.get('state_reason'), 'state_value': message.get('state_value') } if state_reason_data: alarm_columns['state_reason_data'] = state_reason_data alarm_columns['project_id'] = project_id self.cass.put_metric_alarm(alarm_key, alarm_columns) def process_check_metric_alarms_msg(self, message): query_time = datetime.utcnow() stale_metrics = [] ready_to_evaluate = message.get('ready_to_evaluate') for key, metric in self.metrics.iteritems(): is_stale = metric.is_stale() if is_stale: stale_metrics.append(key) if (not is_stale) and ready_to_evaluate: metric.check_alarms(query_time) for key in stale_metrics: try: metric = self.metrics.pop(key) metric.delete() LOG.audit("Stale metric(%s) is deleted", str(key)) except KeyError: LOG.error("KeyError occurred when delete stale metric(%s)", str(key)) def process(self, tup): message = json.loads(tup.values[1]) message_id = message['message_id'] message_uuid = message.get('message_uuid', None) LOG.info("start processing msg[%s:%s]", message_id, message_uuid) try: metric_key = UUID(tup.values[0]) if tup.values[0] else None except ValueError: LOG.error("badly formed hexadecimal UUID string - %s", tup.values[0]) return if message_id == PUT_METRIC_DATA_MSG_ID: # message deduplicate if message_uuid: mckey = "%s_message_uuid" % message_uuid if not self.mc.get(mckey): # 300 seconds TTL self.mc.set(mckey, 1, 300) LOG.info("process put_metric_data_msg (%s)", message) self.process_put_metric_data_msg(metric_key, message) else: LOG.info("Message duplicated. %s", message_uuid) elif message_id == PUT_METRIC_ALARM_MSG_ID: LOG.info("process put_metric_alarm_msg (%s)", message) self.process_put_metric_alarm_msg(metric_key, message) elif message_id == DELETE_ALARMS_MSG_ID: LOG.info("process delete_alarms_msg (%s)", message) self.process_delete_metric_alarms_msg(metric_key, message) elif message_id == SET_ALARM_STATE_MSG_ID: LOG.info("process set_alarm_state_msg (%s)", message) self.process_set_alarm_state_msg(metric_key, message) elif message_id == CHECK_METRIC_ALARM_MSG_ID: LOG.info("process check_metric_alarm_msg (%s)", message) self.process_check_metric_alarms_msg(message) else: LOG.error("unknown message")
class API(object): ROLLING_FUNC_MAP = { 'Average': rolling_mean, 'Minimum': rolling_min, 'Maximum': rolling_max, 'SampleCount': rolling_sum, 'Sum': rolling_sum, } def __init__(self): self.cass = Cassandra() self.rpc = rpc.RemoteProcedureCall() def delete_alarms(self, context, project_id, alarm_names): alarmkeys = [] for alarm_name in alarm_names: k = self.cass.get_metric_alarm_key(project_id, alarm_name) if not k: raise ResourceNotFound("Alarm %s does not exists." % alarm_name) alarmkeys.append(str(k)) body = { 'project_id': project_id, 'alarmkeys': alarmkeys, 'context': context.to_dict() } # UUID str self.rpc.send_msg(rpc.DELETE_ALARMS_MSG_ID, body) LOG.info("DELETE_ALARMS_MSG sent") def describe_alarms(self, project_id, action_prefix=None, alarm_name_prefix=None, alarm_names=None, max_records=None, next_token=None, state_value=None): """ params: project_id: string action_prefix: TODO: not implemented yet. alarm_name_prefix: string alarm_names: string list max_records: integer next_token: string (uuid type) state_value: string (OK | ALARM | INSUFFICIENT_DATA) """ alarms = self.cass.describe_alarms(project_id, action_prefix, alarm_name_prefix, alarm_names, max_records, next_token, state_value) return alarms def describe_alarms_for_metric(self, project_id, namespace, metric_name, dimensions=None, period=None, statistic=None, unit=None): """ params: project_id: string metric_name: string namespace: string dimensions: dict period: integer statistic: string (SampleCount | Average | Sum | Minimum | Maximum) unit: string """ alarms = self.cass.describe_alarms_for_metric(project_id, namespace, metric_name, dimensions=dimensions, period=period, statistic=statistic, unit=unit) return alarms def describe_alarm_history(self, project_id, alarm_name=None, end_date=None, history_item_type=None, max_records=None, next_token=None, start_date=None): histories = self.cass.describe_alarm_history( alarm_name=alarm_name, end_date=end_date, history_item_type=history_item_type, max_records=max_records, next_token=next_token, start_date=start_date, project_id=project_id) return histories def set_alarm_actions(self, context, project_id, alarm_names, enabled): for alarm_name in alarm_names: alarm_key = self.cass.get_metric_alarm_key(project_id, alarm_name) if not alarm_key: raise InvalidParameterValue("Alarm %s does not exist" % alarm_name) for alarm_name in alarm_names: alarm_key = self.cass.get_metric_alarm_key(project_id, alarm_name) history_data = { 'actions_enabled': enabled, 'project_id': project_id } self.cass.put_metric_alarm(alarm_key, history_data) if enabled: summary = "Alarm actions for %s are enabled" % alarm_name else: summary = "Alarm actions for %s are disabled" % alarm_name history_key = uuid.uuid4() history_column = { 'project_id': project_id, 'alarm_key': alarm_key, 'alarm_name': alarm_name, 'history_data': json.dumps(history_data), 'history_item_type': 'ConfigurationUpdate', 'history_summary': summary, 'timestamp': utils.utcnow() } self.cass.insert_alarm_history(history_key, history_column) def set_alarm_state(self, context, project_id, alarm_name, state_reason, state_value, state_reason_data=None): k = self.cass.get_metric_alarm_key(project_id, alarm_name) if not k: raise ResourceNotFound("Alarm %s does not exists." % alarm_name) body = { 'project_id': project_id, 'alarm_name': alarm_name, 'state_reason': state_reason, 'state_value': state_value, 'state_reason_data': state_reason_data, 'context': context.to_dict() } self.rpc.send_msg(rpc.SET_ALARM_STATE_MSG_ID, body) LOG.info("SET_ALARM_STATE_MSG sent") def get_metric_statistics(self, project_id, end_time, metric_name, namespace, period, start_time, statistics, unit=None, dimensions=None): """ 입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다. """ def to_datapoint(df, idx): datapoint = df.ix[idx].dropna() if len(datapoint): return idx, datapoint end_idx = end_time.replace(second=0, microsecond=0) start_idx = start_time.replace(second=0, microsecond=0) start_ana_idx = start_idx - datetools.Minute() * (period / 60) daterange = DateRange(start_idx, end_idx, offset=datetools.Minute()) daterange_ana = DateRange(start_ana_idx, end_idx, offset=datetools.Minute()) # load default unit for metric from database if unit == "None" or not unit: metric_key = self.cass.get_metric_key(project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions) if metric_key: unit = self.cass.get_metric_unit(metric_key) else: unit = "None" # load statistics data from database stats = self.cass.get_metric_statistics(project_id=project_id, namespace=namespace, metric_name=metric_name, start_time=start_ana_idx, end_time=end_time, period=period, statistics=statistics, dimensions=dimensions) period = period / 60 # convert sec to min stat = DataFrame(index=daterange) for statistic, series in zip(statistics, stats): func = self.ROLLING_FUNC_MAP[statistic] ts = TimeSeries(series, index=daterange_ana) rolled_ts = func(ts, period, min_periods=0) stat[statistic] = rolled_ts.ix[::period] LOG.debug("stat %s\n%s" % (statistic, stat[statistic])) ret = filter(None, (to_datapoint(stat, i) for i in stat.index)) return ret, unit def list_metrics(self, project_id, next_token=None, dimensions=None, metric_name=None, namespace=None): """ List Metrics """ metrics = self.cass.list_metrics(project_id, namespace, metric_name, dimensions, next_token) return metrics def put_metric_alarm(self, context, project_id, metricalarm): """ Send put metric alarm message to Storm """ def _validate_actions(alarm): for actions in (alarm.ok_actions, alarm.insufficient_data_actions, alarm.alarm_actions): for action in actions: if utils.validate_groupnotification_action(action): group = utils.parse_groupnotification_action(action) if not self.cass.get_notification_group(group): raise InvalidNotificationGroup() now = utils.utcnow() _validate_actions(metricalarm) metricalarm = metricalarm.to_columns() alarm_name = metricalarm['alarm_name'] namespace = metricalarm['namespace'] metric_name = metricalarm['metric_name'] dimensions = json.loads(metricalarm['dimensions']) # check if we have metric in database metric_key = self.cass.get_metric_key_or_create( project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions, unit=metricalarm['unit']) update_data = { 'project_id': project_id, 'metric_key': str(metric_key), 'alarm_arn': "arn:spcs:synaps:%s:alarm:%s" % (project_id, alarm_name), 'alarm_configuration_updated_timestamp': utils.strtime(now) } metricalarm.update(update_data) # check if metric is changed alarm_key = self.cass.get_metric_alarm_key(project_id=project_id, alarm_name=alarm_name) if alarm_key: original_alarm = self.cass.get_metric_alarm(alarm_key) if (str(original_alarm['metric_key']) != str( metricalarm['metric_key'])): raise InvalidRequest("Metric cannot be changed. " "Delete alarm and retry.") else: # If alarm is newly added, check quotas # check alarm quota per project project_quota = FLAGS.get('alarm_quota_per_project') alarms_in_project = self.cass.get_alarm_count(project_id) if alarms_in_project >= project_quota: LOG.info("Too many alarms(%d) in the project %s", alarms_in_project, project_id) raise ProjectAlarmQuotaExceeded() # check alarm quota per metric metric_quota = FLAGS.get('alarm_quota_per_metric') alarms_per_metric = self.cass.get_alarms_per_metric_count( project_id, namespace, metric_name, dimensions) if alarms_per_metric >= metric_quota: LOG.info("Too many alarms(%d) for this metric", alarms_per_metric) raise MetricAlarmQuotaExceeded() message = { 'project_id': project_id, 'metric_key': str(metric_key), 'metricalarm': metricalarm, 'context': context.to_dict() } self.rpc.send_msg(rpc.PUT_METRIC_ALARM_MSG_ID, message) LOG.info("PUT_METRIC_ALARM_MSG sent") return {} def put_metric_data(self, context, project_id, namespace, metric_name, dimensions, value, unit, timestamp=None, is_admin=False): admin_namespace = FLAGS.get('admin_namespace') if namespace.startswith(admin_namespace) and not is_admin: raise AdminRequired() timestamp = timestamp or utils.strtime(utils.utcnow()) message = { 'project_id': project_id, 'namespace': namespace, 'metric_name': metric_name, 'dimensions': dimensions, 'value': value, 'unit': unit, 'timestamp': timestamp, 'context': context.to_dict() } self.rpc.send_msg(rpc.PUT_METRIC_DATA_MSG_ID, message) LOG.info("PUT_METRIC_DATA_MSG sent") return {}
class ActionBolt(storm.BasicBolt): BOLT_NAME = "ActionBolt" def initialize(self, stormconf, context): self.pid = os.getpid() self.cass = Cassandra() self.enable_send_mail = FLAGS.get('enable_send_mail') self.enable_send_sms = FLAGS.get('enable_send_sms') self.enable_instance_action = FLAGS.get('enable_instance_action') self.notification_server = FLAGS.get('notification_server_addr') self.statistics_ttl = FLAGS.get('statistics_ttl') self.smtp_server = FLAGS.get('smtp_server') self.mail_sender = FLAGS.get('mail_sender') self.sms_sender = FLAGS.get('sms_sender') self.sms_db_host = FLAGS.get('sms_database_host') self.sms_db_port = FLAGS.get('sms_database_port') self.sms_db = FLAGS.get('sms_database') self.sms_db_username = FLAGS.get('sms_db_username') self.sms_db_password = FLAGS.get('sms_db_password') self.nova_auth_url = FLAGS.get('nova_auth_url') self.nova_admin_tenant_name = FLAGS.get('admin_tenant_name') self.nova_admin_user = FLAGS.get('admin_user') self.nova_admin_password = FLAGS.get('admin_password') self.region = FLAGS.get('region') self.lms_template = FLAGS.get('lms_template') self.email_body_template = FLAGS.get('email_body_template') self.email_subject_template = FLAGS.get('email_subject_template') self.api = API() def get_action_type(self, action): if validate_email(action): return "email" elif validate_international_phonenumber(action): return "SMS" elif validate_instance_action(action): return "InstanceAction" elif validate_groupnotification_action(action): return "GroupNotificationAction" def meter_sms_actions(self, project_id, receivers): ctxt = get_admin_context() local_receivers = [r for r in receivers if r.startswith("+82")] international_receivers = [r for r in receivers if not r.startswith("+82")] self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="LocalSMSActionCount", dimensions={}, value=len(local_receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="InternationalSMSActionCount", dimensions={}, value=len(international_receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) LOG.audit("Meter SMS: %s %s %s", project_id, len(receivers), receivers) def meter_email_actions(self, project_id, receivers): ctxt = get_admin_context() self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="EmailActionCount", dimensions={}, value=len(receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) LOG.audit("Meter Email: %s %s %s", project_id, len(receivers), receivers) def meter_instance_actions(self, project_id, receivers): ctxt = get_admin_context() self.api.put_metric_data(ctxt, project_id, namespace="SPCS/SYNAPS", metric_name="InstanceActionCount", dimensions={}, value=len(receivers), unit="Count", timestamp=utils.strtime(utils.utcnow()), is_admin=True) LOG.audit("Meter InstanceAction: %s %s %s", project_id, len(receivers), receivers) def alarm_history_state_update(self, alarmkey, alarm, notification_message): """ update alarm history based on notification message notification_message = { 'method': "email", 'receivers': email_receivers, 'subject': message['subject'], 'body': message['body'], 'state': "ok" | "failed" } """ item_type = 'Action' project_id = alarm['project_id'] if notification_message.get("method") in ("email", "SMS"): if notification_message.get('state', 'ok') == 'ok': history_summary = "Message '%(subject)s' is sent via"\ " %(method)s" % notification_message else: history_summary = "Failed to send a message '%(subject)s' via"\ " %(method)s" % notification_message elif notification_message.get("method") in ("InstanceAction"): if notification_message.get('state', 'ok') == 'ok': history_summary = "%(method)s %(receivers)s is invoked." % \ notification_message else: history_summary = "Failed to invoke %(method)s %(receivers)s."\ % notification_message timestamp = utils.utcnow() history_key = uuid4() column = {'project_id':project_id, 'alarm_key':UUID(alarmkey), 'alarm_name':alarm['alarm_name'], 'history_data': json.dumps(notification_message), 'history_item_type':item_type, 'history_summary':history_summary, 'timestamp':timestamp} self.cass.insert_alarm_history(history_key, column, ttl=self.statistics_ttl) LOG.info("History updated. %s", history_summary) def process_action(self, tup): """ message example msg = { 'state': new_state['stateValue'], 'subject': "%s state has been changed from %s to %s" % (alarm['alarm_name'], old_state['stateValue'], new_state['stateValue']), 'body': new_state['stateReason'] } """ def convert_group_notification(actions): ret = [] for ac in actions: if validate_groupnotification_action(ac): groupname = parse_groupnotification_action(ac) new_actions = self.cass.get_notification_group(groupname) ret += list(new_actions) else: ret.append(ac) return ret alarm_key = tup.values[0] message_buf = tup.values[1] message = json.loads(message_buf) LOG.info("start processing tup %s", tup) alarm = self.cass.get_metric_alarm(UUID(alarm_key)) try: actions_enabled = alarm['actions_enabled'] except TypeError: LOG.debug("Alarm(%s) is not found", alarm_key) return False if message['state'] == 'OK': actions = json.loads(alarm['ok_actions']) elif message['state'] == 'INSUFFICIENT_DATA': actions = json.loads(alarm['insufficient_data_actions']) elif message['state'] == 'ALARM': actions = json.loads(alarm['alarm_actions']) actions = convert_group_notification(actions) if actions_enabled and actions: if self.enable_send_sms: self.process_sms_action(alarm_key, alarm, message, actions) if self.enable_send_mail: self.process_email_action(alarm_key, alarm, message, actions) if self.enable_instance_action: self.process_instance_action(alarm_key, alarm, message, actions) def do_instance_action(self, alarm_key, alarm, instance_actions): nc = utils.get_python_novaclient() for action in instance_actions: action_type, vm_uuid = parse_instance_action(action) server = nc.servers.get(vm_uuid) if action_type == "Migrate": server.migrate() LOG.info("instance action %s invoked for %s", action_type, server) elif action_type == "Reboot": server.reboot('HARD') LOG.info("instance action %s invoked for %s", action_type, server) def process_instance_action(self, alarm_key, alarm, message, actions): instance_actions = [action for action in actions if self.get_action_type(action) == "InstanceAction"] instance_action_message = {'method': "InstanceAction", 'receivers': instance_actions, 'subject': message['subject'], 'body': message['body'], 'state': 'ok'} if instance_actions: try: self.do_instance_action(alarm_key, alarm, instance_actions) except Exception as e: instance_action_message['state'] = 'failed' LOG.exception(e) LOG.audit("InstanceAction is invoked. %s", instance_action_message) self.alarm_history_state_update(alarm_key, alarm, instance_action_message) if instance_action_message['state'] != 'failed': self.meter_instance_actions(alarm['project_id'], instance_actions) def process_email_action(self, alarm_key, alarm, message, actions): email_receivers = list(set([action for action in actions if self.get_action_type(action) == "email"])) notification_message = {'method': "email", 'receivers': email_receivers, 'subject': message['subject'], 'body': message['body'], 'state': 'ok', 'alarm_description': message['alarm_description']} if email_receivers: try: self.send_email(notification_message) except Exception as e: notification_message['state'] = 'failed' LOG.exception(e) LOG.audit("Email sent. %s", notification_message) self.alarm_history_state_update(alarm_key, alarm, notification_message) if notification_message['state'] != 'failed': self.meter_email_actions(alarm['project_id'], email_receivers) def process_sms_action(self, alarm_key, alarm, message, actions): sms_receivers = list(set([action for action in actions if self.get_action_type(action) == "SMS"])) notification_message = {'method': "SMS", 'receivers': sms_receivers, 'subject': message['subject'], 'body': message['body'], 'state': 'ok', 'alarm_description': message['alarm_description']} if sms_receivers: try: self.send_sms(notification_message) except Exception as e: notification_message['state'] = 'failed' LOG.exception(e) LOG.audit("SMS sent. %s", notification_message) self.alarm_history_state_update(alarm_key, alarm, notification_message) if notification_message['state'] != 'failed': self.meter_sms_actions(alarm['project_id'], sms_receivers) def send_sms(self, message): Q_LOCAL = """insert into MMS_SEND(REG_TIME, MSG_SEQ, MSG_KEY, RECEIVER, SENDER, SUBJECT, MESSAGE) values (now()+0, %s, %s, %s, %s, %s, %s) """ Q_NAT = """insert into SMS_SEND(REG_TIME, MSG_KEY, RECEIVER, SENDER, MESSAGE, NAT_CODE) values (now()+0, %s, %s, %s, %s, %s) """ def build_query(receiver, message): nat, local_no = parse_number(receiver) subject = message['subject'] body = message['body'] description = message['alarm_description'] # random integer for msg_key msg_key = randint(1, 10 ** 15) if nat == None: message = self.lms_template % {'region': self.region, 'subject': subject, 'reason': body, 'description': description} ret = Q_LOCAL params = (msg_key, msg_key, local_no, self.sms_sender, subject, message) else: if len(subject) > 80: subject = subject[:77] + "..." ret = Q_NAT params = (msg_key, local_no, self.sms_sender, subject, nat) return ret, params def parse_number(no): nat, local_no = no.split(' ', 1) if nat.startswith("+"): nat = int(nat[1:]) else: nat = int(nat) if nat == 82: # Korean national code nat = None local_no = '0' + local_no.replace(' ', '') else: local_no = local_no.replace(' ', '') return nat, local_no # message example. # # {'body': u'Threshold Crossed: 3 datapoints were greater than the # threshold(50.000000). The most recent datapoints: # [110.0, 110.0, 60.0]. at 2012-08-28T10:17:50.494902', # 'receivers': [u'+82 1093145616'], # 'method': 'SMS', # 'subject': u'AlarmActionTest state has been changed from OK to # ALARM at 2012-08-28T10:17:50.494902', # 'alarm_description': u''} # LOG.debug("Connect to mysql db %s@%s:%s %s" % (self.sms_db_username, self.sms_db_host, self.sms_db_port, self.sms_db)) conn = db.connect(host=self.sms_db_host, port=self.sms_db_port, db=self.sms_db, user=self.sms_db_username, passwd=self.sms_db_password, connect_timeout=30, charset='utf8') c = conn.cursor() for receiver in message['receivers']: q, params = build_query(receiver, message) c.execute(q, params) c.close() conn.commit() conn.close() def send_email(self, message): msg_dict = {'region': escape(self.region), 'reason': escape(message['body']), 'subject': escape(message['subject']), 'description': escape(message['alarm_description'])} body = body = self.email_body_template % msg_dict msg = MIMEText(body, 'html', 'utf8') msg['Subject'] = self.email_subject_template % msg_dict msg['From'] = self.mail_sender msg['To'] = ", ".join(message['receivers']) s = smtplib.SMTP(self.smtp_server, timeout=30) s.sendmail(self.mail_sender, message['receivers'], msg.as_string()) s.quit() def process(self, tup): self.process_action(tup)