Beispiel #1
0
    def recover_grastate(self):
        """Recover UUID and sequence number

        There is no clean way to recover the database state (i.e. UUID
        and sequence number).  The only viable way to retrieve this
        information is by parsing the log created from running with
        wsrep_recover=on.
        """
        logfile = os.path.join(self.datadir, 'wsrep-recovery-%s.log' % uuid4())
        self.logger.info("Attempting recovery to %s", logfile)
        self.reconfigure(wsrep_recovery_log=logfile)
        self.systemctl_start(self.service)
        # Service should have stopped immediately after performing
        # recovery, but force a stop just in case.
        self.systemctl_stop(self.service)
        pattern = re.compile(r'^.*Recovered position:\s*(?P<state>\S+)$')
        with open(logfile, 'rb') as f:
            try:
                m = next(m for m in (pattern.match(line) for line in f) if m)
            except StopIteration:
                raise ocf.GenericError("Recovery failed: see %s" % logfile)
        try:
            state = GaleraState(m.group('state'))
        except ValueError as e:
            raise ocf.GenericError("%s: see %s" % (str(e), logfile))
        self.logger.info("Recovered %s from %s", state, logfile)
        os.remove(logfile)
        return state
Beispiel #2
0
 def read_grastate(self):
     """Read state from Galera state file"""
     raw = {}
     try:
         f = open(self.grastate_file, 'rb')
     except IOError:
         self.logger.error("Missing state file %s" % self.grastate_file)
         return None
     with f:
         for lineno, line in enumerate(f, start=1):
             if re.match(r'^\s*(#.*)?$', line):
                 continue
             m = re.match(r'^\s*(?P<key>\w+):\s*(?P<value>.*?)\s*$', line)
             if not m:
                 raise ocf.GenericError("Corrupt %s on line %d" %
                                        (self.grastate_file, lineno))
             raw[m.group('key')] = m.group('value')
     uuid = raw.get('uuid')
     seqno = raw.get('seqno')
     if uuid is None:
         raise ocf.GenericError("Missing UUID in %s" % self.grastate_file)
     if seqno is None:
         raise ocf.GenericError("Missing sequence number in %s" %
                                self.grastate_file)
     try:
         state = GaleraState(uuid=uuid, seqno=seqno)
     except ValueError as e:
         raise ocf.GenericError("%s in %s" % (str(e), self.grastate_file))
     self.logger.info("Found %s in %s", state, self.grastate_file)
     return state
Beispiel #3
0
 def service_start(self):
     """Start slave service"""
     # Record state parameters (performing recovery if needed)
     self.state = self.read_grastate() or self.recover_grastate()
     # Check that UUID matches cluster UUID, if already set
     if self.cluster_uuid is not None:
         if self.uuid not in (ZERO_UUID_STRING, self.cluster_uuid):
             raise ocf.GenericError("UUID does not match cluster UUID")
Beispiel #4
0
 def rabbitmqctl(*args):
     """Perform an action via rabbitmqctl"""
     command = ('rabbitmqctl', ) + args
     try:
         output = subprocess.check_output(command, stderr=subprocess.STDOUT)
         return output.rstrip('\n')
     except subprocess.CalledProcessError as e:
         raise ocf.GenericError(e.output or e.returncode)
Beispiel #5
0
 def action_promote(self):
     """Promote resource"""
     # Refuse concurrent bootstrapping of multiple nodes.  This can
     # arise if e.g. all nodes have been demoted (but not stopped)
     # due to a brief network interruption.
     if not self.meta_notify_master_unames:
         bootstrap = self.choose_bootstrap()
         if bootstrap is None:
             raise ocf.GenericError("Refusing to promote without bootstrap")
         if bootstrap != self:
             raise ocf.GenericError("Refusing concurrent promotion with %s" %
                                    bootstrap.node)
     # Start master service
     super(BootstrappingAgent, self).action_promote()
     # Trigger promotion of all remaining nodes, if applicable
     if self.is_bootstrap:
         self.trigger_promote_all()
     return ocf.SUCCESS
Beispiel #6
0
 def systemctl(self, action, unit=None):
     """Perform an action via systemctl"""
     if unit is None:
         unit = self.service
     command = ('systemctl', action, unit)
     try:
         output = subprocess.check_output(command, stderr=subprocess.STDOUT)
         return output.rstrip('\n')
     except subprocess.CalledProcessError as e:
         raise ocf.GenericError(e.output or e.returncode)
Beispiel #7
0
 def master_is_running(self):
     """Check if master service is running"""
     if not self.systemctl_is_active(self.service):
         return False
     output = self.mysql_exec("SHOW STATUS LIKE 'wsrep_local_state'")
     m = re.match(r'^\s*wsrep_local_state\s+(?P<state>\d+)\s*$', output)
     if not m:
         raise ocf.GenericError("Unable to determine state:\n%s" % output)
     state = int(m.group('state'))
     if state != WSREP_STATE_SYNCED:
         self.logger.error("Unexpected local state %d", state)
         return False
     return True
Beispiel #8
0
 def master_start(self):
     """Start master service"""
     # Check that UUID matches cluster UUID, if already set
     if self.cluster_uuid is not None:
         if self.uuid not in (ZERO_UUID_STRING, self.cluster_uuid):
             raise ocf.GenericError("UUID does not match cluster UUID")
     # Delete empty primary component state file, if present
     self.delete_empty_gvwstate()
     # Start service (in normal mode)
     self.logger.info("Beginning at %s", self.state)
     self.systemctl_start(self.service)
     # Validate and update recorded state
     state = self.read_grastate()
     if state is None:
         raise ocf.GenericError("Unable to determine state after promotion")
     if self.uuid not in (ZERO_UUID_STRING, state.uuid):
         raise ocf.GenericError("UUID changed unexpectedly after promotion")
     self.state = state
     # Record cluster UUID if not already set
     if self.cluster_uuid is None:
         self.logger.info("Set new cluster UUID %s", self.uuid)
         self.cluster_uuid = self.uuid
Beispiel #9
0
    def mysql_exec(self, sql):
        """Execute SQL statement"""
        user = pwd.getpwnam(self.user)

        def preexec():
            """Run as specified user"""
            os.setgid(user.pw_gid)
            os.setuid(user.pw_uid)

        command = ('mysql', '-s', '-u', self.user, '-e', sql)
        try:
            output = subprocess.check_output(command,
                                             preexec_fn=preexec,
                                             stderr=subprocess.STDOUT)
            return output.rstrip('\n')
        except subprocess.CalledProcessError as e:
            raise ocf.GenericError(e.output or e.returncode)