class Secondary(object): (INIT, OFFLINE, ONLINE) = range(3) CONFIG_NAME = 'ha.xml' def __init__(self, core): self.last_seen = 0 self.state = self.OFFLINE self.core = core self.error = '' self.vars = VariablesStore() def setPort(self, port): self.port = port def settings(self, interface): return {'interface':interface} def loadConfig(self): vars_path = os.path.join(self.core.config.get('CORE', 'vardir'), self.CONFIG_NAME) self.vars.load(vars_path) self.last_seen = self.vars.get('last_seen') self.state = self.vars.get('state') def saveConfig(self): self.vars.set('last_seen', self.last_seen) self.vars.set('state', self.state) vars_path = os.path.join(self.core.config.get('CORE', 'vardir'), self.CONFIG_NAME) self.vars.save(vars_path) def setState(self, state): if self.state != state: self.state = state self.saveConfig() def updateLastSeen(self): self.last_seen = int(time.time()) self.saveConfig()
class Primary(LoggerChild): HELLO_INTERVAL = 5*60 TRANSPORT_INTERVAL = 1 (INIT, OFFLINE, ONLINE) = range(3) CONFIG_NAME = "ha_secondary.xml" def __init__(self, secondary): self.hello_task_id = None self.component = secondary.component self.core = secondary.core self.ctx = Context.fromComponent(self.component) self.state = self.INIT self.vars = VariablesStore() LoggerChild.__init__(self, self.component) @inlineCallbacks def loadConfig(self, interface_name): yield self.setFirewallRules(interface_name) yield self.startTransport() try: vars_path = os.path.join(self.core.config.get('CORE', 'vardir'), self.CONFIG_NAME) self.vars.load(vars_path) self.state = self.vars.get('state') self.resume() except ConfigError, err: if err.error_code != CONFIG_NO_SUCH_FILE: # The file doesn't exist because it's not yet configured self.error("HA configuration isn't valid") self.writeError(err)
class SupervisorComponent(AbstractNuConfComponent): """ This component periodically checks system data like space available in the log partition, compares them to thresholds and take appropriate action (alert e-mail and/or selective purges). """ NAME = "supervisor" VERSION = "1.0" ACLS = { "contact": set(("sendMailToAdmin",)), } ROLES = {"conf_write": set(("purge",)), "conf_read": set(("getStates",))} CONFIG_DEPENDS = ("contact",) def __init__(self): AbstractNuConfComponent.__init__(self) self.states = VariablesStore() self.registered_for_mail = SupervisorMail() self.purging = False def init_done(self): self.states_path = os.path.join( self.core.config.get("CORE", "vardir"), "supervisor.xml") try: self.states.load(self.states_path) except ConfigError: for reaction_function in reaction_functions: self.states.set(reaction_function.__name__, 0) # Launch the first check in 10 seconds. The callback function will # schedule itself with a delay of delay_seconds seconds at the end of # its body. scheduleOnce(10, self.check_and_react) # (Method copied from contact.py and extended.) def read_config(self, *args, **kwargs): self.config = ContactConf.defaultConf() try: serialized = self.core.config_manager.get("contact") valid, message = self._setconfig(serialized) if not valid: self.error( "This means that the configuration is incorrect or that there is a bug" ) except ConfigError: self.debug("Not configured, defaults loaded.") self.registered_for_mail.set_config(self.config) # (Method copied from contact.py.) def _setconfig(self, serialized): # TODO: factorize with exim component _setconfig (and maybe other modules) config = ContactConf.deserialize(serialized) valid, error = config.isValidWithMsg() if valid: self.config = config else: self.error( "Component %s read incorrect values. Message was: %s" % (self.NAME, error) ) return valid, error def apply_config(self, *unused): pass def enhance_message(self, name, check_result): """ Add information to base message and return whether the alert is new. """ threshold = Thresholds.in_threshold(check_result.criticity) previous_threshold = Thresholds.in_threshold( self.states.get(name)) if threshold == Thresholds.last_alert: check_result.message += " " + purge_next_messages.get( self.config.language, purge_next_messages["en"]) if threshold >= Thresholds.alert1: check_result.message = warning_messages.get( self.config.language, warning_messages["en"]) + \ check_result.message return Thresholds.threshold_higher(previous_threshold, threshold) def mail_and_log(self, logger_function, name, *args): """ Log if new and register for mail (as new or old). """ new = False if isinstance(args[0], CheckResult): new = self.enhance_message(name, args[0]) logger_args = (args[0].message,) + args[1:] else: logger_args = args if logger_function == self.critical: new = True if new: logger_function(*logger_args) if logger_args: self.register_for_mail(name, new, logger_args[0]) def mail_critical(self, name, *args): """ A critical message is always new. """ self.mail_and_log(self.critical, name, *args) def mail_warning(self, name, *args): self.mail_and_log(self.warning, name, *args) def register_for_mail(self, name, new, message): self.registered_for_mail.add_alert(name, new, message) def handle_last_result(self, name, check_result, reached_insane): """ Register messages to include in an e-mail. If the first and/or intermediate results reached insane threshold, an e-mail was already sent. """ if check_result.criticity >= Thresholds.insane: failure_message = failure_messages.get( self.config.language, failure_messages["en"]) % name self.mail_critical(name, failure_message) return if check_result.criticity >= Thresholds.alert1: # mail_warning will decide whether this is a new alert. self.mail_warning(name, check_result) else: if reached_insane: # Add it to new alerts and to critical logs, to show that # after being insane the situation is back to normal. self.mail_critical(name, check_result.message) self.registered_for_mail.add_other(name, check_result.message) def _execute_reactions(self, system_data, manual_purge): # Each reaction function addresses a problem and executes corrections # if necessary, until the problem is solved. For instance, # purge_system_log checks /var/log partition remaining space and # deletes logs if there is not enough space left. for reaction_function in reaction_functions: try: check_results = reaction_function( system_data, self, self.config.language, manual_purge) if not check_results: # Should not happen in production. self.critical("Error: could not check criticity for " "function %s." % reaction_function.__name__) break reached_insane = False for check_result in check_results: if check_result.criticity >= Thresholds.insane: self.mail_critical(reaction_function.__name__, check_result.message) reached_insane = True # Warn if the problem is still present (testing last # check_result). if check_results and check_results[-1]: self.handle_last_result(reaction_function.__name__, check_results[-1], reached_insane) self.states.set(reaction_function.__name__, check_results[-1].criticity) self.states.save(self.states_path) except Exception, err: self.writeError( err, "Error while checking system with function %s" % reaction_function.__name__)
class Exporter(Logger): # This is the default value of lastsync, at first launch. # TODO It is important to determine what value to use, because # when value is 0, Exporter sends all entries in database, # instead of nothing when value is now. #LASTSYNC_DEFAULT = 0 LASTSYNC_DEFAULT = time.time() PROTO_VERSION = 1 CONFIG_FILE = 'ufwi_log_export.xml' def __init__(self, context, core): Logger.__init__(self, "Exporter") self.core = core self.context = context self.cron = None self.locked = False self.period = 0 self.rotation_period = 3600*24*30 self.sync_start = 0 self.server_proto = self.PROTO_VERSION self.config = VariablesStore() self.config_path = os.path.join(self.core.config.get('CORE', 'vardir'), self.CONFIG_FILE) try: self.config.load(self.config_path) self.lastsync = int(self.config.get('lastsync')) except (ValueError,ConfigError): self.lastsync = self.LASTSYNC_DEFAULT @staticmethod def getMeta(data): md5 = hashlib.md5(data).hexdigest() sha1 = hashlib.sha1(data).hexdigest() sha256 = hashlib.sha256(data).hexdigest() size = len(data) return md5, sha1, sha256, size def rehash(self, conf): """ Rehash configuration, and (re)start cron job. If period == 0, job is disabled. """ period = int(conf.get('export_period')) if period == self.period: return self.period = period if self.period: self.start() else: self.stop() def stop(self): if self.cron: self.cron.stop() self.cron = None self.locked = False def start(self): self.stop() self.cron = scheduleRepeat(self.period, self.export_table) def export_table(self): if not self.database: self.warning('Not connected anywhere.') return now = time.time() # As last synchronization is older than rotation period, # it is possible that some data are lost. if self.lastsync < now - self.rotation_period: self.warning("Some data are probably lost.") if self.lastsync > now - self.period: return if self.locked: return self.locked = True self.sync_start = time.time() begin, end = self.lastsync, self.lastsync+self.period request = self.database.createRequest() d = self.database.query(request.select_exportable_data(self.server_proto, begin, end)) d.addCallback(self.publish_data) d.addErrback(self.publish_err) return d def publish_data(self, (result, size)): if not result: return self.next_rotation() data = pickle.dumps(result) meta = Exporter.getMeta(data) meta = (self.server_proto,) + meta return self.core.callService(self.context, 'multisite_transport', 'hostFile', data).addCallback(self.send_url, meta).addErrback(self.publish_err)