def __init__(self): self.role = MasterService.RL_ALONE # Current role of this node self.state = MasterService.ST_NORMAL # Current cluster error status self.master = None # Name of the active master self.masterLastSeen = 0 # Timestamp for master failover self.status = dict() # Whole cluster status self.localNode = Node(DNSCache.getInstance().name) self.disk = DiskHeartbeat() self.s_slaveHb = SlaveHearbeatService(self) self.s_masterHb = MasterHeartbeatService(self) self.s_rpc = RPCService(self) # Watchdogs for failover self.l_slaveDog = task.LoopingCall(self.checkMasterHeartbeat) self.l_masterDog = task.LoopingCall(self.checkSlaveHeartbeats) # Election Stuff self.ballotBox = None # All received votes self.currentElection = None # Election name, none if no pending election self.f_tally = None # IDelayedCall used to trigger countVotes() self.lastTallyDate = 0 # Timestamp for debbuging elections self.panicRequested = False # True if panic is requested during election
#!/usr/bin/env python import logging from google.appengine.ext import webapp from google.appengine.ext.webapp import util import handlers from rpc import RPCService from namespaces import * RPCService([("info", info.InfoNamespace), ("person", person.PersonNamespace), ("session", session.SessionNamespace)]) application = webapp.WSGIApplication( [('/', handlers.MainHandler), ('/test', handlers.RPCFormHandler), (r'/rpc/([a-zA-Z]+)/([a-zA-Z]+)', handlers.RPCHandler)], debug=True) def main(): util.run_wsgi_app(application) if __name__ == '__main__': main()
class MasterService(Service): # Possible master roles (for self.role) RL_ACTIVE = "active" # Active master, aka master RL_PASSIVE = "passive" # Passive master, aka slave RL_JOINING = "joining" # When trying to connect to the cluster RL_LEAVING = "leaving" # When trying to leave the cluster RL_VOTING = "voting" # During election stage RL_ALONE = "alone" # Before joining # Possible states, aka error mode (for self.state) ST_NORMAL = "normal" # Normal operations ST_RECOVERY = "recovery" # When a failed node is being recovered ST_PANIC = "panic" # "I don't do anything" mode # Elections and failover timeouts TM_TALLY = 1 # Records vote for 1 sec TM_WATCHDOG = core.cfg['TIMER'] # Check for failure every 3 sec TM_MASTER = TM_WATCHDOG*2 # Re-elect master if no response wihtin 6 sec TM_SLAVE = TM_WATCHDOG*3 # Trigger failover if no response within 9 sec (master + tally + rounding) def __init__(self): self.role = MasterService.RL_ALONE # Current role of this node self.state = MasterService.ST_NORMAL # Current cluster error status self.master = None # Name of the active master self.masterLastSeen = 0 # Timestamp for master failover self.status = dict() # Whole cluster status self.localNode = Node(DNSCache.getInstance().name) self.disk = DiskHeartbeat() self.s_slaveHb = SlaveHearbeatService(self) self.s_masterHb = MasterHeartbeatService(self) self.s_rpc = RPCService(self) # Watchdogs for failover self.l_slaveDog = task.LoopingCall(self.checkMasterHeartbeat) self.l_masterDog = task.LoopingCall(self.checkSlaveHeartbeats) # Election Stuff self.ballotBox = None # All received votes self.currentElection = None # Election name, none if no pending election self.f_tally = None # IDelayedCall used to trigger countVotes() self.lastTallyDate = 0 # Timestamp for debbuging elections self.panicRequested = False # True if panic is requested during election def startService(self): Service.startService(self) # Print welcome message log.info("Starting cxmd version", meta.version) self._messagePort=reactor.listenUDP(core.cfg['UDP_PORT'], UDPListener(self.dispatchMessage)) reactor.callLater(2, self.joinCluster) def stopService(self): def exit(result): log.info("Stopping daemon...") if not reactor._stopped: reactor.stop() if self.running: Service.stopService(self) # Stop receiving cluster messages self._messagePort.stopListening() self.s_rpc.stopService().addErrback(log.err) # Cleanly leave cluster d = self.leaveCluster() d.addErrback(log.err) d.addBoth(exit) # Even if there are errors return d else: return defer.succeed(None) def panic(self, noCheck=False): """ Engage panic mode. Use noCheck=True if you want to panic whatever the cluster role. """ def panicFailed(reason): log.emerg("Panic query failed: %s." % (reason.getErrorMessage())) self.panic(True) if self.state == MasterService.ST_PANIC: log.emerg("Panic mode already engaged.") elif self.role == MasterService.RL_ACTIVE or noCheck: log.emerg("SYSTEM FAILURE: Panic mode engaged.") log.emerg("This is a critical error. You should bring your ass over here, right now.") log.emerg("Please check logs and be sure of what you're doing before re-engaging normal mode.") self.state=MasterService.ST_PANIC # TODO + stop LB if self.l_masterDog.running: self.l_masterDog.stop() elif self.role == MasterService.RL_VOTING: # No master during election stage: waiting next master log.warn("Panic mode requested during election stage: delaying.") self.panicRequested=True elif self.role == MasterService.RL_PASSIVE: log.warn("I'm slave: asking master to engage panic mode...") agent=Agent() d=agent.panic() d.addErrback(panicFailed) d.addErrback(log.err) else: # RL_ALONE or RL_JOINING or RL_LEAVING log.warn("I'm not in a running state (master or slave). Cannot engage panic mode.") raise RPCRefusedError("Not in running state") # Properties accessors ########################################################################### def getStatus(self): return self.status def getState(self): return self.state def getLocalNode(self): return self.localNode def getActiveMaster(self): return self.master def getNodesList(self): return self.status.keys() def isActive(self): return self.role == MasterService.RL_ACTIVE def isInPanic(self): return self.state == MasterService.ST_PANIC # Messages handlers ########################################################################### def dispatchMessage(self, data, host): dispatcher = { "slavehb" : self.updateNodeStatus, "masterhb" : self.updateMasterStatus, "voterequest" : self.voteForNewMaster, "voteresponse" : self.recordVote, } try: msg=MessageHelper.get(data, host) log.debugd("Received", msg) dispatcher[msg.type()](msg) except (MessageError, KeyError), e: log.err("Bad message from %s : %s , %s" % (host,data,e)) except IDontCareException: pass # Discard useless messages
class MasterService(Service): # Possible master roles (for self.role) RL_ACTIVE = "active" # Active master, aka master RL_PASSIVE = "passive" # Passive master, aka slave RL_JOINING = "joining" # When trying to connect to the cluster RL_LEAVING = "leaving" # When trying to leave the cluster RL_VOTING = "voting" # During election stage RL_ALONE = "alone" # Before joining # Possible states, aka error mode (for self.state) ST_NORMAL = "normal" # Normal operations ST_RECOVERY = "recovery" # When a failed node is being recovered ST_PANIC = "panic" # "I don't do anything" mode # Elections and failover timeouts TM_TALLY = 1 # Records vote for 1 sec TM_WATCHDOG = core.cfg['TIMER'] # Check for failure every 3 sec TM_MASTER = TM_WATCHDOG * 2 # Re-elect master if no response wihtin 6 sec TM_SLAVE = TM_WATCHDOG * 3 # Trigger failover if no response within 9 sec (master + tally + rounding) def __init__(self): self.role = MasterService.RL_ALONE # Current role of this node self.state = MasterService.ST_NORMAL # Current cluster error status self.master = None # Name of the active master self.masterLastSeen = 0 # Timestamp for master failover self.status = dict() # Whole cluster status self.localNode = Node(DNSCache.getInstance().name) self.disk = DiskHeartbeat() self.s_slaveHb = SlaveHearbeatService(self) self.s_masterHb = MasterHeartbeatService(self) self.s_rpc = RPCService(self) # Watchdogs for failover self.l_slaveDog = task.LoopingCall(self.checkMasterHeartbeat) self.l_masterDog = task.LoopingCall(self.checkSlaveHeartbeats) # Election Stuff self.ballotBox = None # All received votes self.currentElection = None # Election name, none if no pending election self.f_tally = None # IDelayedCall used to trigger countVotes() self.lastTallyDate = 0 # Timestamp for debbuging elections self.panicRequested = False # True if panic is requested during election def startService(self): Service.startService(self) # Print welcome message log.info("Starting cxmd version", meta.version) self._messagePort = reactor.listenUDP( core.cfg['UDP_PORT'], UDPListener(self.dispatchMessage)) reactor.callLater(2, self.joinCluster) def stopService(self): def exit(result): log.info("Stopping daemon...") if not reactor._stopped: reactor.stop() if self.running: Service.stopService(self) # Stop receiving cluster messages self._messagePort.stopListening() self.s_rpc.stopService().addErrback(log.err) # Cleanly leave cluster d = self.leaveCluster() d.addErrback(log.err) d.addBoth(exit) # Even if there are errors return d else: return defer.succeed(None) def panic(self, noCheck=False): """ Engage panic mode. Use noCheck=True if you want to panic whatever the cluster role. """ def panicFailed(reason): log.emerg("Panic query failed: %s." % (reason.getErrorMessage())) self.panic(True) if self.state == MasterService.ST_PANIC: log.emerg("Panic mode already engaged.") elif self.role == MasterService.RL_ACTIVE or noCheck: log.emerg("SYSTEM FAILURE: Panic mode engaged.") log.emerg( "This is a critical error. You should bring your ass over here, right now." ) log.emerg( "Please check logs and be sure of what you're doing before re-engaging normal mode." ) self.state = MasterService.ST_PANIC # TODO + stop LB if self.l_masterDog.running: self.l_masterDog.stop() elif self.role == MasterService.RL_VOTING: # No master during election stage: waiting next master log.warn("Panic mode requested during election stage: delaying.") self.panicRequested = True elif self.role == MasterService.RL_PASSIVE: log.warn("I'm slave: asking master to engage panic mode...") agent = Agent() d = agent.panic() d.addErrback(panicFailed) d.addErrback(log.err) else: # RL_ALONE or RL_JOINING or RL_LEAVING log.warn( "I'm not in a running state (master or slave). Cannot engage panic mode." ) raise RPCRefusedError("Not in running state") # Properties accessors ########################################################################### def getStatus(self): return self.status def getState(self): return self.state def getLocalNode(self): return self.localNode def getActiveMaster(self): return self.master def getNodesList(self): return self.status.keys() def isActive(self): return self.role == MasterService.RL_ACTIVE def isInPanic(self): return self.state == MasterService.ST_PANIC # Messages handlers ########################################################################### def dispatchMessage(self, data, host): dispatcher = { "slavehb": self.updateNodeStatus, "masterhb": self.updateMasterStatus, "voterequest": self.voteForNewMaster, "voteresponse": self.recordVote, } try: msg = MessageHelper.get(data, host) log.debugd("Received", msg) dispatcher[msg.type()](msg) except (MessageError, KeyError), e: log.err("Bad message from %s : %s , %s" % (host, data, e)) except IDontCareException: pass # Discard useless messages