def _refresh(self, alarm, state, reason, reason_data, always_record=False): """Refresh alarm state.""" try: previous = alarm.state alarm.state = state if previous != state or always_record: LOG.info(_('alarm %(id)s transitioning to %(state)s because ' '%(reason)s') % {'id': alarm.alarm_id, 'state': state, 'reason': reason}) try: self._storage_conn.update_alarm(alarm) except storage.AlarmNotFound: LOG.warning(_LW("Skip updating this alarm's state, the" "alarm: %s has been deleted"), alarm.alarm_id) else: self._record_change(alarm) self.notifier.notify(alarm, previous, reason, reason_data) elif alarm.repeat_actions: self.notifier.notify(alarm, previous, reason, reason_data) except Exception: # retry will occur naturally on the next evaluation # cycle (unless alarm state reverts in the meantime) LOG.exception(_('alarm state update failed'))
def post(self, data): """Create a new alarm. :param data: an alarm within the request body. """ rbac.enforce('create_alarm', pecan.request.headers, pecan.request.enforcer) conn = pecan.request.alarm_storage_conn now = timeutils.utcnow() data.alarm_id = str(uuid.uuid4()) user_limit, project_limit = rbac.get_limited_to( pecan.request.headers, pecan.request.enforcer) def _set_ownership(aspect, owner_limitation, header): attr = '%s_id' % aspect requested_owner = getattr(data, attr) explicit_owner = requested_owner != wtypes.Unset caller = pecan.request.headers.get(header) if (owner_limitation and explicit_owner and requested_owner != caller): raise base.ProjectNotAuthorized(requested_owner, aspect) actual_owner = (owner_limitation or requested_owner if explicit_owner else caller) setattr(data, attr, actual_owner) _set_ownership('user', user_limit, 'X-User-Id') _set_ownership('project', project_limit, 'X-Project-Id') # Check if there's room for one more alarm if is_over_quota(conn, data.project_id, data.user_id): raise OverQuota(data) data.timestamp = now data.state_timestamp = now ALARMS_RULES[data.type].plugin.create_hook(data) change = data.as_dict(models.Alarm) data.update_actions() # make sure alarms are unique by name per project. alarms = list(conn.get_alarms(name=data.name, project=data.project_id)) if alarms: raise base.ClientSideError(_("Alarm with name='%s' exists") % data.name, status_code=409) try: alarm_in = models.Alarm(**change) except Exception: LOG.exception(_("Error while posting alarm: %s") % change) raise base.ClientSideError(_("Alarm incorrect")) alarm = conn.create_alarm(alarm_in) self._record_creation(conn, change, alarm.alarm_id, now) v2_utils.set_resp_location_hdr("/v2/alarms/" + alarm.alarm_id) return Alarm.from_db_model(alarm)
def put(self, data): """Modify this alarm. :param data: an alarm within the request body. """ rbac.enforce('change_alarm', pecan.request.headers, pecan.request.enforcer) # Ensure alarm exists alarm_in = self._alarm() now = timeutils.utcnow() data.alarm_id = self._id user, project = rbac.get_limited_to(pecan.request.headers, pecan.request.enforcer) if user: data.user_id = user elif data.user_id == wtypes.Unset: data.user_id = alarm_in.user_id if project: data.project_id = project elif data.project_id == wtypes.Unset: data.project_id = alarm_in.project_id data.timestamp = now if alarm_in.state != data.state: data.state_timestamp = now else: data.state_timestamp = alarm_in.state_timestamp # make sure alarms are unique by name per project. if alarm_in.name != data.name: alarms = list( self.conn.get_alarms(name=data.name, project=data.project_id)) if alarms: raise base.ClientSideError(_("Alarm with name=%s exists") % data.name, status_code=409) ALARMS_RULES[data.type].plugin.update_hook(data) old_data = Alarm.from_db_model(alarm_in) old_alarm = old_data.as_dict(models.Alarm) data.update_actions(old_data) updated_alarm = data.as_dict(models.Alarm) try: alarm_in = models.Alarm(**updated_alarm) except Exception: LOG.exception(_("Error while putting alarm: %s") % updated_alarm) raise base.ClientSideError(_("Alarm incorrect")) alarm = self.conn.update_alarm(alarm_in) change = dict( (k, v) for k, v in updated_alarm.items() if v != old_alarm[k] and k not in ['timestamp', 'state_timestamp']) self._record_change(change, now, on_behalf_of=alarm.project_id) return Alarm.from_db_model(alarm)
def valid_composite_rule(rules): if isinstance(rules, dict) and len(rules) == 1: and_or_key = list(rules)[0] if and_or_key not in ('and', 'or'): raise base.ClientSideError( _('Threshold rules should be combined with "and" or "or"')) if isinstance(rules[and_or_key], list): for sub_rule in rules[and_or_key]: CompositeRule.valid_composite_rule(sub_rule) else: raise InvalidCompositeRule(rules) elif isinstance(rules, dict): rule_type = rules.pop('type', None) if not rule_type: raise base.ClientSideError(_('type must be set in every rule')) if rule_type not in CompositeRule.threshold_plugins: plugins = sorted(CompositeRule.threshold_plugins.names()) err = _('Unsupported sub-rule type :%(rule)s in composite ' 'rule, should be one of: %(plugins)s') % { 'rule': rule_type, 'plugins': plugins } raise base.ClientSideError(err) plugin = CompositeRule.threshold_plugins[rule_type].plugin wjson.fromjson(plugin, rules) rule_dict = plugin(**rules).as_dict() rules.update(rule_dict) rules.update(type=rule_type) else: raise InvalidCompositeRule(rules)
def __init__(self, alarm_id, type, enabled, name, description, timestamp, user_id, project_id, state, state_timestamp, state_reason, ok_actions, alarm_actions, insufficient_data_actions, repeat_actions, rule, time_constraints, severity=None): if not isinstance(timestamp, datetime.datetime): raise TypeError(_("timestamp should be datetime object")) if not isinstance(state_timestamp, datetime.datetime): raise TypeError(_("state_timestamp should be datetime object")) base.Model.__init__( self, alarm_id=alarm_id, type=type, enabled=enabled, name=name, description=description, timestamp=timestamp, user_id=user_id, project_id=project_id, state=state, state_timestamp=state_timestamp, state_reason=state_reason, ok_actions=ok_actions, alarm_actions=alarm_actions, insufficient_data_actions=insufficient_data_actions, repeat_actions=repeat_actions, rule=rule, time_constraints=time_constraints, severity=severity)
def __init__(self, alarm_id, type, enabled, name, description, timestamp, user_id, project_id, state, state_timestamp, ok_actions, alarm_actions, insufficient_data_actions, repeat_actions, rule, time_constraints, severity=None): if not isinstance(timestamp, datetime.datetime): raise TypeError(_("timestamp should be datetime object")) if not isinstance(state_timestamp, datetime.datetime): raise TypeError(_("state_timestamp should be datetime object")) base.Model.__init__( self, alarm_id=alarm_id, type=type, enabled=enabled, name=name, description=description, timestamp=timestamp, user_id=user_id, project_id=project_id, state=state, state_timestamp=state_timestamp, ok_actions=ok_actions, alarm_actions=alarm_actions, insufficient_data_actions=insufficient_data_actions, repeat_actions=repeat_actions, rule=rule, time_constraints=time_constraints, severity=severity)
def _handle_action(self, action, alarm_id, alarm_name, severity, previous, current, reason, reason_data): try: action = netutils.urlsplit(action) except Exception: LOG.error( _("Unable to parse action %(action)s for alarm %(alarm_id)s"), {'action': action, 'alarm_id': alarm_id}) return try: notifier = self.notifiers[action.scheme].obj except KeyError: scheme = action.scheme LOG.error( _("Action %(scheme)s for alarm %(alarm_id)s is unknown, " "cannot notify"), {'scheme': scheme, 'alarm_id': alarm_id}) return try: LOG.debug("Notifying alarm %(id)s with action %(act)s", {'id': alarm_id, 'act': action}) notifier.notify(action, alarm_id, alarm_name, severity, previous, current, reason, reason_data) except Exception: LOG.exception(_("Unable to notify alarm %s"), alarm_id) return
def put(self, data): """Modify this alarm. :param data: an alarm within the request body. """ rbac.enforce('change_alarm', pecan.request.headers, pecan.request.enforcer) # Ensure alarm exists alarm_in = self._alarm() now = timeutils.utcnow() data.alarm_id = self._id user, project = rbac.get_limited_to(pecan.request.headers, pecan.request.enforcer) if user: data.user_id = user elif data.user_id == wtypes.Unset: data.user_id = alarm_in.user_id if project: data.project_id = project elif data.project_id == wtypes.Unset: data.project_id = alarm_in.project_id data.timestamp = now if alarm_in.state != data.state: data.state_timestamp = now else: data.state_timestamp = alarm_in.state_timestamp # make sure alarms are unique by name per project. if alarm_in.name != data.name: alarms = list(self.conn.get_alarms(name=data.name, project=data.project_id)) if alarms: raise base.ClientSideError( _("Alarm with name=%s exists") % data.name, status_code=409) ALARMS_RULES[data.type].plugin.update_hook(data) old_data = Alarm.from_db_model(alarm_in) old_alarm = old_data.as_dict(models.Alarm) data.update_actions(old_data) updated_alarm = data.as_dict(models.Alarm) try: alarm_in = models.Alarm(**updated_alarm) except Exception: LOG.exception(_("Error while putting alarm: %s") % updated_alarm) raise base.ClientSideError(_("Alarm incorrect")) alarm = self.conn.update_alarm(alarm_in) change = dict((k, v) for k, v in updated_alarm.items() if v != old_alarm[k] and k not in ['timestamp', 'state_timestamp']) self._record_change(change, now, on_behalf_of=alarm.project_id) return Alarm.from_db_model(alarm)
def _handle_action(self, action, alarm_id, alarm_name, severity, previous, current, reason, reason_data): try: action = netutils.urlsplit(action) except Exception: LOG.error( _("Unable to parse action %(action)s for alarm %(alarm_id)s"), { 'action': action, 'alarm_id': alarm_id }) return try: notifier = self.notifiers[action.scheme].obj except KeyError: scheme = action.scheme LOG.error( _("Action %(scheme)s for alarm %(alarm_id)s is unknown, " "cannot notify"), { 'scheme': scheme, 'alarm_id': alarm_id }) return try: LOG.debug("Notifying alarm %(id)s with action %(act)s", { 'id': alarm_id, 'act': action }) notifier.notify(action, alarm_id, alarm_name, severity, previous, current, reason, reason_data) except Exception: LOG.exception(_("Unable to notify alarm %s"), alarm_id) return
def check_rule(alarm): if (not pecan.request.cfg.api.enable_combination_alarms and alarm.type == 'combination'): raise base.ClientSideError("Unavailable alarm type") rule = '%s_rule' % alarm.type if getattr(alarm, rule) in (wtypes.Unset, None): error = _("%(rule)s must be set for %(type)s" " type alarm") % { "rule": rule, "type": alarm.type } raise base.ClientSideError(error) rule_set = None for ext in ALARMS_RULES: name = "%s_rule" % ext.name if getattr(alarm, name): if rule_set is None: rule_set = name else: error = _("%(rule1)s and %(rule2)s cannot be set at the " "same time") % { 'rule1': rule_set, 'rule2': name } raise base.ClientSideError(error)
def valid_composite_rule(rules): if isinstance(rules, dict) and len(rules) == 1: and_or_key = list(rules)[0] if and_or_key not in ('and', 'or'): raise base.ClientSideError( _('Threshold rules should be combined with "and" or "or"')) if isinstance(rules[and_or_key], list): for sub_rule in rules[and_or_key]: CompositeRule.valid_composite_rule(sub_rule) else: raise InvalidCompositeRule(rules) elif isinstance(rules, dict): rule_type = rules.pop('type', None) if not rule_type: raise base.ClientSideError(_('type must be set in every rule')) if rule_type not in CompositeRule.threshold_plugins: plugins = sorted(CompositeRule.threshold_plugins.names()) err = _('Unsupported sub-rule type :%(rule)s in composite ' 'rule, should be one of: %(plugins)s') % { 'rule': rule_type, 'plugins': plugins} raise base.ClientSideError(err) plugin = CompositeRule.threshold_plugins[rule_type].plugin wjson.fromjson(plugin, rules) rule_dict = plugin(**rules).as_dict() rules.update(rule_dict) rules.update(type=rule_type) else: raise InvalidCompositeRule(rules)
def _reason(self, alarm, new_state, rule_target_alarm): transition = alarm.state != new_state reason_data = { 'type': 'composite', 'composition_form': str(rule_target_alarm) } root_cause_rules = {} for rule in self.rule_targets: if rule.state == new_state: root_cause_rules.update({rule.rule_name: rule.rule}) reason_data.update(causative_rules=root_cause_rules) params = { 'state': new_state, 'expression': str(rule_target_alarm), 'rules': ', '.join(sorted(root_cause_rules)), 'description': STATE_CHANGE[new_state] } if transition: reason = (_('Composite rule alarm with composition form: ' '%(expression)s transition to %(state)s, due to ' 'rules: %(rules)s %(description)s') % params) else: reason = (_('Composite rule alarm with composition form: ' '%(expression)s remaining as %(state)s, due to ' 'rules: %(rules)s %(description)s') % params) return reason, reason_data
def _reason(self, alarm, new_state, rule_target_alarm): transition = alarm.state != new_state reason_data = { 'type': 'composite', 'composition_form': str(rule_target_alarm)} root_cause_rules = {} for rule in self.rule_targets: if rule.state == new_state: root_cause_rules.update({rule.rule_name: rule.rule}) reason_data.update(causative_rules=root_cause_rules) params = {'state': new_state, 'expression': str(rule_target_alarm), 'rules': ', '.join(sorted(root_cause_rules)), 'description': STATE_CHANGE[new_state]} if transition: reason = (_('Composite rule alarm with composition form: ' '%(expression)s transition to %(state)s, due to ' 'rules: %(rules)s %(description)s') % params) else: reason = (_('Composite rule alarm with composition form: ' '%(expression)s remaining as %(state)s, due to ' 'rules: %(rules)s %(description)s') % params) return reason, reason_data
def connect(self, url, max_retries, retry_interval): connection_options = pymongo.uri_parser.parse_uri(url) del connection_options['database'] del connection_options['username'] del connection_options['password'] del connection_options['collection'] pool_key = tuple(connection_options) if pool_key in self._pool: client = self._pool.get(pool_key)() if client: return client splitted_url = netutils.urlsplit(url) log_data = {'db': splitted_url.scheme, 'nodelist': connection_options['nodelist']} LOG.info(_('Connecting to %(db)s on %(nodelist)s') % log_data) try: client = MongoProxy( pymongo.MongoClient(url), max_retries, retry_interval, ) except pymongo.errors.ConnectionFailure as e: LOG.warn(_('Unable to connect to the database server: ' '%(errmsg)s.') % {'errmsg': e}) raise self._pool[pool_key] = weakref.ref(client) return client
def check_alarm_actions(alarm): max_actions = pecan.request.cfg.api.alarm_max_actions for state in state_kind: actions_name = state.replace(" ", "_") + "_actions" actions = getattr(alarm, actions_name) if not actions: continue action_set = set(actions) if len(actions) != len(action_set): LOG.info(_LI("duplicate actions are found: %s, " "remove duplicate ones"), actions) actions = list(action_set) setattr(alarm, actions_name, actions) if 0 < max_actions < len(actions): error = _("%(name)s count exceeds maximum value " "%(maximum)d") % { "name": actions_name, "maximum": max_actions, } raise base.ClientSideError(error) limited = rbac.get_limited_to_project(pecan.request.headers, pecan.request.enforcer) for action in actions: try: url = netutils.urlsplit(action) except Exception: error = _("Unable to parse action %s") % action raise base.ClientSideError(error) if url.scheme not in ACTIONS_SCHEMA: error = _("Unsupported action %s") % action raise base.ClientSideError(error) if limited and url.scheme in ("log", "test"): error = _("You are not authorized to create " "action: %s") % action raise base.ClientSideError(error, status_code=401)
def connect(self, url, max_retries, retry_interval): connection_options = pymongo.uri_parser.parse_uri(url) del connection_options['database'] del connection_options['username'] del connection_options['password'] del connection_options['collection'] pool_key = tuple(connection_options) if pool_key in self._pool: client = self._pool.get(pool_key)() if client: return client splitted_url = netutils.urlsplit(url) log_data = { 'db': splitted_url.scheme, 'nodelist': connection_options['nodelist'] } LOG.info(_('Connecting to %(db)s on %(nodelist)s') % log_data) try: client = MongoProxy( pymongo.MongoClient(url), max_retries, retry_interval, ) except pymongo.errors.ConnectionFailure as e: LOG.warning( _('Unable to connect to the database server: ' '%(errmsg)s.') % {'errmsg': e}) raise self._pool[pool_key] = weakref.ref(client) return client
def __init__(self, alarm, auth_project): if not auth_project: msg = _('Alarm %s not found') % alarm else: msg = _('Alarm %(alarm_id)s not found in project %' '(project)s') % { 'alarm_id': alarm, 'project': auth_project} super(AlarmNotFound, self).__init__(msg, status_code=404)
def get_value(self, forced_type=None): """Convert metadata value to the specified data type. This method is called during metadata query to help convert the querying metadata to the data type specified by user. If there is no data type given, the metadata will be parsed by ast.literal_eval to try to do a smart converting. NOTE (flwang) Using "_" as prefix to avoid an InvocationError raised from wsmeext/sphinxext.py. It's OK to call it outside the Query class. Because the "public" side of that class is actually the outside of the API, and the "private" side is the API implementation. The method is only used in the API implementation, so it's OK. :returns: metadata value converted with the specified data type. """ type = forced_type or self.type try: converted_value = self.value if not type: try: converted_value = ast.literal_eval(self.value) except (ValueError, SyntaxError): # Unable to convert the metadata value automatically # let it default to self.value pass else: if type not in self._supported_types: # Types must be explicitly declared so the # correct type converter may be used. Subclasses # of Query may define _supported_types and # _type_converters to define their own types. raise TypeError() converted_value = self._type_converters[type](self.value) if isinstance(converted_value, datetime.datetime): converted_value = timeutils.normalize_time(converted_value) except ValueError: msg = (_('Unable to convert the value %(value)s' ' to the expected data type %(type)s.') % { 'value': self.value, 'type': type }) raise ClientSideError(msg) except TypeError: msg = (_('The data type %(type)s is not supported. The supported' ' data type list is: %(supported)s') % { 'type': type, 'supported': self._supported_types }) raise ClientSideError(msg) except Exception: msg = (_('Unexpected exception converting %(value)s to' ' the expected data type %(type)s.') % { 'value': self.value, 'type': type }) raise ClientSideError(msg) return converted_value
def _convert_to_datetime(isotime): try: date_time = timeutils.parse_isotime(isotime) date_time = date_time.replace(tzinfo=None) return date_time except ValueError: LOG.exception(_("String %s is not a valid isotime") % isotime) msg = _('Failed to parse the timestamp value %s') % isotime raise base.ClientSideError(msg)
def post(self, data): """Create a new alarm. :param data: an alarm within the request body. """ rbac.enforce('create_alarm', pecan.request) conn = pecan.request.alarm_storage_conn now = timeutils.utcnow() data.alarm_id = str(uuid.uuid4()) user_limit, project_limit = rbac.get_limited_to(pecan.request.headers) def _set_ownership(aspect, owner_limitation, header): attr = '%s_id' % aspect requested_owner = getattr(data, attr) explicit_owner = requested_owner != wtypes.Unset caller = pecan.request.headers.get(header) if (owner_limitation and explicit_owner and requested_owner != caller): raise base.ProjectNotAuthorized(requested_owner, aspect) actual_owner = (owner_limitation or requested_owner if explicit_owner else caller) setattr(data, attr, actual_owner) _set_ownership('user', user_limit, 'X-User-Id') _set_ownership('project', project_limit, 'X-Project-Id') # Check if there's room for one more alarm if is_over_quota(conn, data.project_id, data.user_id): raise OverQuota(data) data.timestamp = now data.state_timestamp = now ALARMS_RULES[data.type].plugin.create_hook(data) change = data.as_dict(alarm_models.Alarm) # make sure alarms are unique by name per project. alarms = list(conn.get_alarms(name=data.name, project=data.project_id)) if alarms: raise base.ClientSideError( _("Alarm with name='%s' exists") % data.name, status_code=409) try: alarm_in = alarm_models.Alarm(**change) except Exception: LOG.exception(_("Error while posting alarm: %s") % change) raise base.ClientSideError(_("Alarm incorrect")) alarm = conn.create_alarm(alarm_in) self._record_creation(conn, change, alarm.alarm_id, now) return Alarm.from_db_model(alarm)
def _evaluate_assigned_alarms(self): try: alarms = self._assigned_alarms() LOG.info(_('initiating evaluation cycle on %d alarms') % len(alarms)) for alarm in alarms: self._evaluate_alarm(alarm) except Exception: LOG.exception(_('alarm evaluation cycle failed'))
def _evaluate_assigned_alarms(self): try: alarms = self._assigned_alarms() LOG.info( _('initiating evaluation cycle on %d alarms') % len(alarms)) for alarm in alarms: self._evaluate_alarm(alarm) except Exception: LOG.exception(_('alarm evaluation cycle failed'))
def _statistics(self, alarm, query): """Retrieve statistics over the current window.""" LOG.debug(_('stats query %s') % query) try: return self._client.statistics.list( meter_name=alarm.rule['meter_name'], q=query, period=alarm.rule['period']) except Exception: LOG.exception(_('alarm stats retrieval failed')) return []
def __init__(self, alarm, auth_project): if not auth_project: msg = _('Alarm %s not found') % alarm else: msg = _('Alarm %(alarm_id)s not found in project %' '(project)s') % { 'alarm_id': alarm, 'project': auth_project } super(AlarmNotFound, self).__init__(msg, status_code=404)
def _evaluate_alarm(self, alarm): """Evaluate the alarms assigned to this evaluator.""" if alarm.type not in self.supported_evaluators: LOG.debug(_('skipping alarm %s: type unsupported') % alarm.alarm_id) return LOG.debug(_('evaluating alarm %s') % alarm.alarm_id) try: self.evaluators[alarm.type].obj.evaluate(alarm) except Exception: LOG.exception(_('Failed to evaluate alarm %s'), alarm.alarm_id)
def _get_value_as_type(self, forced_type=None): """Convert metadata value to the specified data type. This method is called during metadata query to help convert the querying metadata to the data type specified by user. If there is no data type given, the metadata will be parsed by ast.literal_eval to try to do a smart converting. NOTE (flwang) Using "_" as prefix to avoid an InvocationError raised from wsmeext/sphinxext.py. It's OK to call it outside the Query class. Because the "public" side of that class is actually the outside of the API, and the "private" side is the API implementation. The method is only used in the API implementation, so it's OK. :returns: metadata value converted with the specified data type. """ type = forced_type or self.type try: converted_value = self.value if not type: try: converted_value = ast.literal_eval(self.value) except (ValueError, SyntaxError): # Unable to convert the metadata value automatically # let it default to self.value pass else: if type not in self._supported_types: # Types must be explicitly declared so the # correct type converter may be used. Subclasses # of Query may define _supported_types and # _type_converters to define their own types. raise TypeError() converted_value = self._type_converters[type](self.value) if isinstance(converted_value, datetime.datetime): converted_value = timeutils.normalize_time(converted_value) except ValueError: msg = (_('Unable to convert the value %(value)s' ' to the expected data type %(type)s.') % {'value': self.value, 'type': type}) raise ClientSideError(msg) except TypeError: msg = (_('The data type %(type)s is not supported. The supported' ' data type list is: %(supported)s') % {'type': type, 'supported': self._supported_types}) raise ClientSideError(msg) except Exception: msg = (_('Unexpected exception converting %(value)s to' ' the expected data type %(type)s.') % {'value': self.value, 'type': type}) raise ClientSideError(msg) return converted_value
def _reason(cls, alarm, statistics, distilled, state): """Fabricate reason string.""" count = len(statistics) disposition = 'inside' if state == evaluator.OK else 'outside' last = statistics[-1] transition = alarm.state != state reason_data = cls._reason_data(disposition, count, last) if transition: return (_('Transition to %(state)s due to %(count)d samples' ' %(disposition)s threshold, most recent:' ' %(most_recent)s') % dict(reason_data, state=state)), reason_data return (_('Remaining as %(state)s due to %(count)d samples' ' %(disposition)s threshold, most recent: %(most_recent)s') % dict(reason_data, state=state)), reason_data
def evaluate(self, alarm): if not self.within_time_constraint(alarm): LOG.debug(_('Attempted to evaluate alarm %s, but it is not ' 'within its time constraint.') % alarm.alarm_id) return query = self._bound_duration( alarm, alarm.rule['query'] ) statistics = self._sanitize( alarm, self._statistics(alarm, query) ) if self._sufficient(alarm, statistics): def _compare(stat): op = COMPARATORS[alarm.rule['comparison_operator']] value = getattr(stat, alarm.rule['statistic']) limit = alarm.rule['threshold'] LOG.debug(_('comparing value %(value)s against threshold' ' %(limit)s') % {'value': value, 'limit': limit}) return op(value, limit) self._transition(alarm, statistics, [_compare(statistic) for statistic in statistics])
def _transition_alarm(self, alarm, state, trending_state, statistics, outside_count): unknown = alarm.state == evaluator.UNKNOWN continuous = alarm.repeat_actions if trending_state: if unknown or continuous: state = trending_state if unknown else alarm.state reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data) return if state == evaluator.UNKNOWN and not unknown: LOG.warning(_LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics)}) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _('%d datapoints are unknown') % alarm.rule[ 'evaluation_periods'] last = None if not statistics else statistics[-1] reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, state, reason, reason_data) elif state and (alarm.state != state or continuous): reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data)
def _sufficient_states(self, alarm, states): """Check for the sufficiency of the data for evaluation. Ensure that there is sufficient data for evaluation, transitioning to unknown otherwise. """ # note(sileht): alarm can be evaluated only with # stable state of other alarm alarms_missing_states = [ alarm_id for alarm_id, state in states if not state or state == evaluator.UNKNOWN ] sufficient = len(alarms_missing_states) == 0 if not sufficient and alarm.rule['operator'] == 'or': # if operator is 'or' and there is one alarm, then the combinated # alarm's state should be 'alarm' sufficient = bool([ alarm_id for alarm_id, state in states if state == evaluator.ALARM ]) if not sufficient and alarm.state != evaluator.UNKNOWN: reason = (_('Alarms %(alarm_ids)s' ' are in unknown state') % { 'alarm_ids': ",".join(alarms_missing_states) }) reason_data = self._reason_data(alarms_missing_states) self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data) return sufficient
def validate(tc): if tc.timezone: try: pytz.timezone(tc.timezone) except Exception: raise base.ClientSideError(_("Timezone %s is not valid") % tc.timezone) return tc
def _get_alarm_state(self, alarm_id): try: alarm = self._client.alarms.get(alarm_id) except Exception: LOG.exception(_('alarm retrieval failed')) return None return alarm.state
def notify_alarm(self, context, data): """Notify that alarm has been triggered. :param context: Request context. :param data: (dict): - actions, the URL of the action to run; this is mapped to extensions automatically - alarm_id, the ID of the alarm that has been triggered - alarm_name, the name of the alarm that has been triggered - severity, the level of the alarm that has been triggered - previous, the previous state of the alarm - current, the new state the alarm has transitioned to - reason, the reason the alarm changed its state - reason_data, a dict representation of the reason """ actions = data.get('actions') if not actions: LOG.error(_("Unable to notify for an alarm with no action")) return for action in actions: self._handle_action(action, data.get('alarm_id'), data.get('alarm_name'), data.get('severity'), data.get('previous'), data.get('current'), data.get('reason'), data.get('reason_data'))
def _compare(value): op = COMPARATORS[alarm.rule['comparison_operator']] limit = alarm.rule['threshold'] LOG.debug(_('comparing value %(value)s against threshold' ' %(limit)s') % {'value': value, 'limit': limit}) return op(value, limit)
def update_hook(alarm): # should check if there is any circle in the dependency, but for # efficiency reason, here only check alarm cannot depend on itself if alarm.alarm_id in alarm.combination_rule.alarm_ids: raise base.ClientSideError( _('Cannot specify alarm %s itself in combination rule') % alarm.alarm_id)
def __init__(self, entity, id): super(EntityNotFound, self).__init__(_("%(entity)s %(id)s Not Found") % { 'entity': entity, 'id': id }, status_code=404)
def _get_alarm_state(self, alarm_id): try: alarm = self._storage_conn.get_alarms(alarm_id=alarm_id) except Exception: LOG.exception(_('alarm retrieval failed')) return None return alarm.state
def validate(rule): rule.alarm_ids = sorted(set(rule.alarm_ids), key=rule.alarm_ids.index) if len(rule.alarm_ids) <= 1: raise base.ClientSideError(_('Alarm combination rule should ' 'contain at least two different ' 'alarm ids.')) return rule
def _transition_alarm(self, alarm, state, trending_state, statistics, outside_count): unknown = alarm.state == evaluator.UNKNOWN continuous = alarm.repeat_actions if trending_state: if unknown or continuous: state = trending_state if unknown else alarm.state reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data) return if state == evaluator.UNKNOWN and not unknown: LOG.warning( _LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics) }) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _( '%d datapoints are unknown') % alarm.rule['evaluation_periods'] last = None if not statistics else statistics[-1] reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, state, reason, reason_data) elif state and (alarm.state != state or continuous): reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data)
def _statistics(self, alarm, start, end): """Retrieve statistics over the current window.""" method = 'get' req = { 'url': self.gnocchi_url + "/v1", 'headers': self._get_headers(), 'params': { 'aggregation': alarm.rule['aggregation_method'], 'start': start, 'end': end, } } if alarm.type == 'gnocchi_aggregation_by_resources_threshold': method = 'post' req['url'] += "/aggregation/resource/%s/metric/%s" % ( alarm.rule['resource_type'], alarm.rule['metric']) req['data'] = alarm.rule['query'] # FIXME(sileht): In case of a heat autoscaling stack decide to # delete an instance, the gnocchi metrics associated to this # instance will be no more updated and when the alarm will ask # for the aggregation, gnocchi will raise a 'No overlap' exception. # So temporary set 'needed_overlap' to 0 to disable the # gnocchi checks about missing points. For more detail see: # https://bugs.launchpad.net/gnocchi/+bug/1479429 req['params']['needed_overlap'] = 0 elif alarm.type == 'gnocchi_aggregation_by_metrics_threshold': req['url'] += "/aggregation/metric" req['params']['metric[]'] = alarm.rule['metrics'] elif alarm.type == 'gnocchi_resources_threshold': req['url'] += "/resource/%s/%s/metric/%s/measures" % ( alarm.rule['resource_type'], alarm.rule['resource_id'], alarm.rule['metric']) LOG.debug('stats query %s', req['url']) try: r = getattr(requests, method)(**req) except Exception: LOG.exception(_('alarm stats retrieval failed')) return [] if int(r.status_code / 100) != 2: LOG.exception(_('alarm stats retrieval failed: %s') % r.text) return [] else: return jsonutils.loads(r.text)
def _handle_action(notifiers, action, alarm_id, alarm_name, severity, previous, current, reason, reason_data): """Process action on alarm :param notifiers: list of possible notifiers. :param action: The action that is being attended, as a parsed URL. :param alarm_id: The triggered alarm. :param alarm_name: The name of triggered alarm. :param severity: The level of triggered alarm :param previous: The previous state of the alarm. :param current: The current state of the alarm. :param reason: The reason the alarm changed its state. :param reason_data: A dict representation of the reason. """ try: action = netutils.urlsplit(action) except Exception: LOG.error( _("Unable to parse action %(action)s for alarm %(alarm_id)s"), { 'action': action, 'alarm_id': alarm_id }) return try: notifier = notifiers[action.scheme].obj except KeyError: scheme = action.scheme LOG.error( _("Action %(scheme)s for alarm %(alarm_id)s is unknown, " "cannot notify"), { 'scheme': scheme, 'alarm_id': alarm_id }) return try: LOG.debug("Notifying alarm %(id)s with action %(act)s", { 'id': alarm_id, 'act': action }) notifier.notify(action, alarm_id, alarm_name, severity, previous, current, reason, reason_data) except Exception: LOG.exception(_("Unable to notify alarm %s"), alarm_id) return
def _statistics(self, alarm, start, end): """Retrieve statistics over the current window.""" method = 'get' req = { 'url': self.gnocchi_url + "/v1", 'headers': self._get_headers(), 'params': { 'aggregation': alarm.rule['aggregation_method'], 'start': start, 'end': end, } } if alarm.type == 'gnocchi_aggregation_by_resources_threshold': method = 'post' req['url'] += "/aggregation/resource/%s/metric/%s" % ( alarm.rule['resource_type'], alarm.rule['metric']) req['data'] = alarm.rule['query'] # FIXME(sileht): In case of a heat autoscaling stack decide to # delete an instance, the gnocchi metrics associated to this # instance will be no more updated and when the alarm will ask # for the aggregation, gnocchi will raise a 'No overlap' exception. # So temporary set 'percent_of_overlap' to 0 to disable the # gnocchi checks about missing points. For more detail see: # https://bugs.launchpad.net/gnocchi/+bug/1479429 req['params']['percent_of_overlap'] = 0 elif alarm.type == 'gnocchi_aggregation_by_metrics_threshold': req['url'] += "/aggregation/metric" req['params']['metric[]'] = alarm.rule['metrics'] elif alarm.type == 'gnocchi_resources_threshold': req['url'] += "/resource/%s/%s/metric/%s/measures" % ( alarm.rule['resource_type'], alarm.rule['resource_id'], alarm.rule['metric']) LOG.debug('stats query %s', req['url']) try: r = getattr(requests, method)(**req) except Exception: LOG.exception(_('alarm stats retrieval failed')) return [] if int(r.status_code / 100) != 2: LOG.exception(_('alarm stats retrieval failed: %s') % r.text) return [] else: return jsonutils.loads(r.text)
def validate(tc): if tc.timezone: try: pytz.timezone(tc.timezone) except Exception: raise base.ClientSideError( _("Timezone %s is not valid") % tc.timezone) return tc
def check_rule(alarm): rule = '%s_rule' % alarm.type if getattr(alarm, rule) in (wtypes.Unset, None): error = _("%(rule)s must be set for %(type)s" " type alarm") % {"rule": rule, "type": alarm.type} raise base.ClientSideError(error) rule_set = None for ext in ALARMS_RULES: name = "%s_rule" % ext.name if getattr(alarm, name): if rule_set is None: rule_set = name else: error = _("%(rule1)s and %(rule2)s cannot be set at the " "same time") % {'rule1': rule_set, 'rule2': name} raise base.ClientSideError(error)
def default_description(self): return (_('Alarm when %(meter_name)s is %(comparison_operator)s a ' '%(statistic)s of %(threshold)s over %(period)s seconds') % dict(comparison_operator=self.comparison_operator, statistic=self.statistic, threshold=self.threshold, meter_name=self.meter_name, period=self.period))
def _reason(cls, alarm, state, underlying_states): """Fabricate reason string.""" transition = alarm.state != state alarms_to_report = [alarm_id for alarm_id, alarm_state in underlying_states if alarm_state == state] reason_data = cls._reason_data(alarms_to_report) if transition: return (_('Transition to %(state)s due to alarms' ' %(alarm_ids)s in state %(state)s') % {'state': state, 'alarm_ids': ",".join(alarms_to_report)}), reason_data return (_('Remaining as %(state)s due to alarms' ' %(alarm_ids)s in state %(state)s') % {'state': state, 'alarm_ids': ",".join(alarms_to_report)}), reason_data
def __init__(self, data): d = { 'u': data.user_id, 'p': data.project_id } super(OverQuota, self).__init__( _("Alarm quota exceeded for user %(u)s on project %(p)s") % d, status_code=403)
def validate(value): try: json.dumps(value) except TypeError: raise base.ClientSideError( _('%s is not JSON serializable') % value) else: CompositeRule.valid_composite_rule(value) return value
def validate_alarm(cls, alarm): super(AlarmEventRule, cls).validate_alarm(alarm) for i in alarm.event_rule.query: i._get_value_as_type() try: _q_validator({"field": i.field, "op": i.op, "value": i.type}) except voluptuous.MultipleInvalid as e: raise base.ClientSideError( _("Query value or traits invalid: %s") % str(e))
def build_server(conf): app = load_app(conf) # Create the WSGI server and start it host, port = conf.api.host, conf.api.port LOG.info(_('Starting server in PID %s') % os.getpid()) LOG.info(_("Configuration:")) conf.log_opt_values(LOG, logging.INFO) if host == '0.0.0.0': LOG.info(_( 'serving on 0.0.0.0:%(sport)s, view at http://127.0.0.1:%(vport)s') % ({'sport': port, 'vport': port})) else: LOG.info(_("serving on http://%(host)s:%(port)s") % ( {'host': host, 'port': port})) serving.run_simple(host, port, app, processes=conf.api.workers)
def _refresh(self, alarm, state, reason, reason_data): """Refresh alarm state.""" try: previous = alarm.state alarm.state = state if previous != state: LOG.info(_('alarm %(id)s transitioning to %(state)s because ' '%(reason)s') % {'id': alarm.alarm_id, 'state': state, 'reason': reason}) self._storage_conn.update_alarm(alarm) self._record_change(alarm) if self.notifier: self.notifier.notify(alarm, previous, reason, reason_data) except Exception: # retry will occur naturally on the next evaluation # cycle (unless alarm state reverts in the meantime) LOG.exception(_('alarm state update failed'))