def main(): args = get_parser().parse_args() # Set up logging to use the console console = logging.StreamHandler(sys.stderr) formatter = logging.Formatter( '[%(asctime)s] %(levelname)-8s %(message)s') console.setFormatter(formatter) root_logger.addHandler(console) if args.debug: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) _validate_conn_options(args) nosql_conf = cfg.ConfigOpts() db_options.set_defaults(nosql_conf, args.nosql_conn) nosql_conf.register_opts(storage.OPTS, 'database') nosql_conn = storage.get_connection_from_config(nosql_conf) sql_conf = cfg.ConfigOpts() db_options.set_defaults(sql_conf, args.sql_conn) sql_conf.register_opts(storage.OPTS, 'database') sql_conn = storage.get_connection_from_config(sql_conf) root_logger.info( _LI("Starting to migrate alarms data from NoSQL to SQL...")) count = 0 for alarm in nosql_conn.get_alarms(): root_logger.debug("Migrating alarm %s..." % alarm.alarm_id) try: sql_conn.create_alarm(alarm) count += 1 except exception.DBDuplicateEntry: root_logger.warning(_LW("Duplicated alarm %s found, skipped."), alarm.alarm_id) if not args.migrate_history: continue history_count = 0 for history in nosql_conn.get_alarm_changes(alarm.alarm_id, None): history_data = history.as_dict() root_logger.debug(" Migrating alarm history data with" " event_id %s..." % history_data['event_id']) try: sql_conn.record_alarm_change(history_data) history_count += 1 except exception.DBDuplicateEntry: root_logger.warning( _LW(" Duplicated alarm history %s found, skipped."), history_data['event_id']) root_logger.info(_LI(" Migrated %(count)s history data of alarm " "%(alarm_id)s"), {'count': history_count, 'alarm_id': alarm.alarm_id}) root_logger.info(_LI("End alarms data migration from NoSQL to SQL, %s" " alarms have been migrated."), count)
def conversion(): confirm = moves.input("This tool is used for converting the combination " "alarms to composite alarms, please type 'yes' to " "confirm: ") if confirm != 'yes': print("Alarm conversion aborted!") return args = get_parser().parse_args() conf = service.prepare_service() conn = storage.get_connection_from_config(conf) combination_alarms = list(conn.get_alarms(alarm_type='combination', alarm_id=args.alarm_id or None)) count = 0 for alarm in combination_alarms: new_name = 'From-combination: %s' % alarm.alarm_id n_alarm = list(conn.get_alarms(name=new_name, alarm_type='composite')) if n_alarm: LOG.warning(_LW('Alarm %(alarm)s has been already converted as ' 'composite alarm: %(n_alarm_id)s, skipped.'), {'alarm': alarm.alarm_id, 'n_alarm_id': n_alarm[0].alarm_id}) continue try: composite_rule = _generate_composite_rule(conn, alarm) except DependentAlarmNotFound as e: LOG.warning(_LW('The dependent alarm %(dep_alarm)s of alarm %' '(com_alarm)s not found, skipped.'), {'com_alarm': e.com_alarm_id, 'dep_alarm': e.dependent_alarm_id}) continue except UnsupportedSubAlarmType as e: LOG.warning(_LW('Alarm conversion from combination to composite ' 'only support combination alarms depending ' 'threshold alarms, the type of alarm %(alarm)s ' 'is: %(type)s, skipped.'), {'alarm': e.sub_alarm_id, 'type': e.sub_alarm_type}) continue new_alarm = models.Alarm(**alarm.as_dict()) new_alarm.alarm_id = str(uuid.uuid4()) new_alarm.name = new_name new_alarm.type = 'composite' new_alarm.description = ('composite alarm converted from combination ' 'alarm: %s' % alarm.alarm_id) new_alarm.rule = composite_rule new_alarm.timestamp = datetime.datetime.now() conn.create_alarm(new_alarm) LOG.info(_LI('End Converting combination alarm %(s_alarm)s to ' 'composite alarm %(d_alarm)s'), {'s_alarm': alarm.alarm_id, 'd_alarm': new_alarm.alarm_id}) count += 1 if args.delete_combination_alarm: for alarm in combination_alarms: LOG.info(_LI('Deleting the combination alarm %s...'), alarm.alarm_id) conn.delete_alarm(alarm.alarm_id) LOG.info(_LI('%s combination alarms have been converted to composite ' 'alarms.'), count)
def _transition_alarm(self, alarm, state, trending_state, statistics, outside_count): unknown = alarm.state == evaluator.UNKNOWN continuous = alarm.repeat_actions if trending_state: if unknown or continuous: state = trending_state if unknown else alarm.state reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data) return if state == evaluator.UNKNOWN and not unknown: LOG.warning( _LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics) }) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _( '%d datapoints are unknown') % alarm.rule['evaluation_periods'] last = None if not statistics else statistics[-1] reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, state, reason, reason_data) elif state and (alarm.state != state or continuous): reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data)
def extract_my_subset(self, group_id, universal_set): """Filters an iterable, returning only objects assigned to this agent. We have a list of objects and get a list of active group members from `tooz`. We then hash all the objects into buckets and return only the ones that hashed into *our* bucket. """ if not group_id: return universal_set if group_id not in self._groups: self.join_group(group_id) try: members = self._get_members(group_id) LOG.debug('Members of group: %s, Me: %s', members, self._my_id) if self._my_id not in members: LOG.warning(_LW('Cannot extract tasks because agent failed to ' 'join group properly. Rejoining group.')) self.join_group(group_id) members = self._get_members(group_id) if self._my_id not in members: raise MemberNotInGroupError(group_id, members, self._my_id) LOG.debug('Members of group: %s, Me: %s', members, self._my_id) hr = HashRing(members) LOG.debug('Universal set: %s', universal_set) my_subset = [v for v in universal_set if hr.get_node(str(v)) == self._my_id] LOG.debug('My subset: %s', my_subset) return my_subset except tooz.coordination.ToozError: LOG.exception(_LE('Error getting group membership info from ' 'coordination backend.')) return []
def setup_app(pecan_config=PECAN_CONFIG, conf=None): if conf is None: # NOTE(jd) That sucks but pecan forces us to use kwargs :( raise RuntimeError("Config is actually mandatory") # FIXME: Replace DBHook with a hooks.TransactionHook app_hooks = [hooks.ConfigHook(conf), hooks.DBHook( storage.get_connection_from_config(conf)), hooks.TranslationHook()] pecan.configuration.set_config(dict(pecan_config), overwrite=True) # NOTE(sileht): pecan debug won't work in multi-process environment pecan_debug = conf.api.pecan_debug if conf.api.workers != 1 and pecan_debug: pecan_debug = False LOG.warning(_LW('pecan_debug cannot be enabled, if workers is > 1, ' 'the value is overrided with False')) app = pecan.make_app( pecan_config['app']['root'], debug=pecan_debug, hooks=app_hooks, wrap_app=middleware.ParsableErrorMiddleware, guess_content_type_from_ext=False ) return app
def _refresh(self, alarm, state, reason, reason_data, always_record=False): """Refresh alarm state.""" try: previous = alarm.state alarm.state = state if previous != state or always_record: LOG.info(_('alarm %(id)s transitioning to %(state)s because ' '%(reason)s') % {'id': alarm.alarm_id, 'state': state, 'reason': reason}) try: self._storage_conn.update_alarm(alarm) except storage.AlarmNotFound: LOG.warning(_LW("Skip updating this alarm's state, the" "alarm: %s has been deleted"), alarm.alarm_id) else: self._record_change(alarm) self.notifier.notify(alarm, previous, reason, reason_data) elif alarm.repeat_actions: self.notifier.notify(alarm, previous, reason, reason_data) except Exception: # retry will occur naturally on the next evaluation # cycle (unless alarm state reverts in the meantime) LOG.exception(_('alarm state update failed'))
def evaluate_events(self, events): """Evaluate the events by referring related alarms.""" if not isinstance(events, list): events = [events] LOG.debug('Starting event alarm evaluation: #events = %d', len(events)) for e in events: LOG.debug('Evaluating event: event = %s', e) try: event = Event(e) except InvalidEvent: LOG.warning( _LW('Event <%s> is invalid, aborting evaluation ' 'for it.'), e) continue for id, alarm in six.iteritems( self._get_project_alarms(event.project)): try: self._evaluate_alarm(alarm, event) except Exception: LOG.exception( _LE('Failed to evaluate alarm (id=%(a)s) ' 'triggered by event = %(e)s.'), { 'a': id, 'e': e }) LOG.debug('Finished event alarm evaluation.')
def extract_my_subset(self, group_id, universal_set): """Filters an iterable, returning only objects assigned to this agent. We have a list of objects and get a list of active group members from `tooz`. We then hash all the objects into buckets and return only the ones that hashed into *our* bucket. """ if not group_id: return universal_set if group_id not in self._groups: self.join_group(group_id) try: members = self._get_members(group_id) LOG.debug('Members of group: %s, Me: %s', members, self._my_id) if self._my_id not in members: LOG.warning( _LW('Cannot extract tasks because agent failed to ' 'join group properly. Rejoining group.')) self.join_group(group_id) members = self._get_members(group_id) if self._my_id not in members: raise MemberNotInGroupError(group_id, members, self._my_id) LOG.debug('Members of group: %s, Me: %s', members, self._my_id) hr = HashRing(members) LOG.debug('Universal set: %s', universal_set) my_subset = [ v for v in universal_set if hr.get_node(str(v)) == self._my_id ] LOG.debug('My subset: %s', my_subset) return my_subset except tooz.coordination.ToozError: LOG.exception( _LE('Error getting group membership info from ' 'coordination backend.')) return []
def setup_app(pecan_config=PECAN_CONFIG, conf=None): if conf is None: # NOTE(jd) That sucks but pecan forces us to use kwargs :( raise RuntimeError("Config is actually mandatory") # FIXME: Replace DBHook with a hooks.TransactionHook app_hooks = [ hooks.ConfigHook(conf), hooks.DBHook(storage.get_connection_from_config(conf)), hooks.TranslationHook() ] pecan.configuration.set_config(dict(pecan_config), overwrite=True) # NOTE(sileht): pecan debug won't work in multi-process environment pecan_debug = conf.api.pecan_debug if conf.api.workers != 1 and pecan_debug: pecan_debug = False LOG.warning( _LW('pecan_debug cannot be enabled, if workers is > 1, ' 'the value is overrided with False')) app = pecan.make_app(pecan_config['app']['root'], debug=pecan_debug, hooks=app_hooks, wrap_app=middleware.ParsableErrorMiddleware, guess_content_type_from_ext=False) return app
def _transition_alarm(self, alarm, state, trending_state, statistics, outside_count): unknown = alarm.state == evaluator.UNKNOWN continuous = alarm.repeat_actions if trending_state: if unknown or continuous: state = trending_state if unknown else alarm.state reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data) return if state == evaluator.UNKNOWN and not unknown: LOG.warning(_LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics)}) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _('%d datapoints are unknown') % alarm.rule[ 'evaluation_periods'] last = None if not statistics else statistics[-1] reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, state, reason, reason_data) elif state and (alarm.state != state or continuous): reason, reason_data = self._reason(alarm, statistics, state, outside_count) self._refresh(alarm, state, reason, reason_data)
def setup_app(pecan_config=None, extra_hooks=None): # FIXME: Replace DBHook with a hooks.TransactionHook app_hooks = [hooks.ConfigHook(), hooks.DBHook( storage.get_connection_from_config(cfg.CONF, 'alarm'),), hooks.TranslationHook()] if extra_hooks: app_hooks.extend(extra_hooks) if not pecan_config: pecan_config = get_pecan_config() pecan.configuration.set_config(dict(pecan_config), overwrite=True) # NOTE(sileht): pecan debug won't work in multi-process environment pecan_debug = CONF.api.pecan_debug if service.get_workers('api') != 1 and pecan_debug: pecan_debug = False LOG.warning(_LW('pecan_debug cannot be enabled, if workers is > 1, ' 'the value is overrided with False')) app = pecan.make_app( pecan_config.app.root, debug=pecan_debug, force_canonical=getattr(pecan_config.app, 'force_canonical', True), hooks=app_hooks, wrap_app=middleware.ParsableErrorMiddleware, guess_content_type_from_ext=False ) return app
def evaluate_events(self, events): """Evaluate the events by referring related alarms.""" if not isinstance(events, list): events = [events] LOG.debug('Starting event alarm evaluation: #events = %d', len(events)) for e in events: LOG.debug('Evaluating event: event = %s', e) try: event = Event(e) except InvalidEvent: LOG.warning(_LW('Event <%s> is invalid, aborting evaluation ' 'for it.'), e) continue for id, alarm in six.iteritems( self._get_project_alarms(event.project)): try: self._evaluate_alarm(alarm, event) except Exception: LOG.exception(_LE('Failed to evaluate alarm (id=%(a)s) ' 'triggered by event = %(e)s.'), {'a': id, 'e': e}) LOG.debug('Finished event alarm evaluation.')
def _statistics(self, rule, start, end): try: return self._gnocchi_client.metric.aggregation( metrics=rule['metrics'], start=start, stop=end, aggregation=rule['aggregation_method']) except Exception as e: LOG.warning(_LW('alarm stats retrieval failed: %s'), e) return []
def create_tables(conn, tables, column_families): for table in tables: try: conn.create_table(table, column_families) except ttypes.AlreadyExists: if conn.table_prefix: table = ("%(table_prefix)s" "%(separator)s" "%(table_name)s" % dict(table_prefix=conn.table_prefix, separator=conn.table_prefix_separator, table_name=table)) LOG.warning(_LW("Cannot create table %s because " "it already exists. Ignoring error"), table)
def create_tables(conn, tables, column_families): for table in tables: try: conn.create_table(table, column_families) except ttypes.AlreadyExists: if conn.table_prefix: table = ("%(table_prefix)s" "%(separator)s" "%(table_name)s" % dict(table_prefix=conn.table_prefix, separator=conn.table_prefix_separator, table_name=table)) LOG.warning( _LW("Cannot create table %s because " "it already exists. Ignoring error"), table)
def _statistics(self, rule, start, end): try: # FIXME(sileht): In case of a heat autoscaling stack decide to # delete an instance, the gnocchi metrics associated to this # instance will be no more updated and when the alarm will ask # for the aggregation, gnocchi will raise a 'No overlap' # exception. # So temporary set 'needed_overlap' to 0 to disable the # gnocchi checks about missing points. For more detail see: # https://bugs.launchpad.net/gnocchi/+bug/1479429 return self._gnocchi_client.metric.aggregation( metrics=rule['metrics'], start=start, stop=end, aggregation=rule['aggregation_method'], needed_overlap=0) except Exception as e: LOG.warning(_LW('alarm stats retrieval failed: %s'), e) return []
def _sufficient(self, alarm, statistics): """Check for the sufficiency of the data for evaluation. Ensure there is sufficient data for evaluation, transitioning to unknown otherwise. """ sufficient = len(statistics) >= alarm.rule['evaluation_periods'] if not sufficient and alarm.state != evaluator.UNKNOWN: LOG.warn(_LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics)}) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _('%d datapoints are unknown') % alarm.rule[ 'evaluation_periods'] last = None if not statistics else statistics[-1] reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data) return sufficient
def _sufficient(self, alarm, statistics): """Check for the sufficiency of the data for evaluation. Ensure there is sufficient data for evaluation, transitioning to unknown otherwise. """ sufficient = len(statistics) >= alarm.rule['evaluation_periods'] if not sufficient and alarm.state != evaluator.UNKNOWN: LOG.warn( _LW('Expecting %(expected)d datapoints but only get ' '%(actual)d') % { 'expected': alarm.rule['evaluation_periods'], 'actual': len(statistics) }) # Reason is not same as log message because we want to keep # consistent since thirdparty software may depend on old format. reason = _( '%d datapoints are unknown') % alarm.rule['evaluation_periods'] last = None if not statistics else statistics[-1] reason_data = self._reason_data('unknown', alarm.rule['evaluation_periods'], last) self._refresh(alarm, evaluator.UNKNOWN, reason, reason_data) return sufficient
def conversion(): args = get_parser().parse_args() conf = service.prepare_service([]) conn = storage.get_connection_from_config(conf) combination_alarms = list( conn.get_alarms(alarm_type='combination', alarm_id=args.alarm_id or None)) count = 0 for alarm in combination_alarms: new_name = 'From-combination: %s' % alarm.alarm_id n_alarm = list(conn.get_alarms(name=new_name, alarm_type='composite')) if n_alarm: LOG.warning( _LW('Alarm %(alarm)s has been already converted as ' 'composite alarm: %(n_alarm_id)s, skipped.'), { 'alarm': alarm.alarm_id, 'n_alarm_id': n_alarm[0].alarm_id }) continue try: composite_rule = _generate_composite_rule(conn, alarm) except DependentAlarmNotFound as e: LOG.warning( _LW('The dependent alarm %(dep_alarm)s of alarm %' '(com_alarm)s not found, skipped.'), { 'com_alarm': e.com_alarm_id, 'dep_alarm': e.dependent_alarm_id }) continue except UnsupportedSubAlarmType as e: LOG.warning( _LW('Alarm conversion from combination to composite ' 'only support combination alarms depending ' 'threshold alarms, the type of alarm %(alarm)s ' 'is: %(type)s, skipped.'), { 'alarm': e.sub_alarm_id, 'type': e.sub_alarm_type }) continue new_alarm = models.Alarm(**alarm.as_dict()) new_alarm.alarm_id = uuidutils.generate_uuid() new_alarm.name = new_name new_alarm.type = 'composite' new_alarm.description = ('composite alarm converted from combination ' 'alarm: %s' % alarm.alarm_id) new_alarm.rule = composite_rule new_alarm.timestamp = datetime.datetime.now() conn.create_alarm(new_alarm) LOG.info( _LI('End Converting combination alarm %(s_alarm)s to ' 'composite alarm %(d_alarm)s'), { 's_alarm': alarm.alarm_id, 'd_alarm': new_alarm.alarm_id }) count += 1 if args.delete_combination_alarm: for alarm in combination_alarms: LOG.info(_LI('Deleting the combination alarm %s...'), alarm.alarm_id) conn.delete_alarm(alarm.alarm_id) LOG.info( _LI('%s combination alarms have been converted to composite ' 'alarms.'), count)