def format(): if DiskHeartbeat.is_in_use(): nodes=DiskHeartbeat().get_all_ts().keys() print "Warning: Heartbeat disk is in use by:", ", ".join(nodes) if(raw_input("Proceed ? [y/N]:").upper() != "Y"): print "Aborded by user." raise SystemExit(0) DiskHeartbeat.format() print "Device", core.cfg['HB_DISK'], "formatted."
def joinCluster(self): def startHeartbeats(): self._startSlave() self.s_rpc.startService() if self.role == MasterService.RL_ACTIVE: self._startMaster() def joinRefused(reason): reason.trap(NodeRefusedError, RPCRefusedError) log.err("Join to cluster %s failed: Master %s has refused me: %s" % (core.cfg['CLUSTER_NAME'], self.master, reason.getErrorMessage())) self.stopService() def joinAccepted(result): self.role=MasterService.RL_PASSIVE log.info("Join successfull, I'm now part of cluster %s." % (core.cfg['CLUSTER_NAME'])) startHeartbeats() def masterConnected(obj): d = obj.callRemote("register",DNSCache.getInstance().name) d.addCallbacks(joinAccepted,joinRefused) d.addErrback(log.err) d.addBoth(lambda _: rpcConnector.disconnect()) return d try: if self.master is None: # New active master if DNSCache.getInstance().name not in core.cfg['ALLOWED_NODES']: log.warn("I'm not allowed to create a new cluster. Exiting.") raise Exception("Cluster creation not allowed") if DiskHeartbeat.is_in_use(): log.err("Heartbeat disk is in use but we are alone !") raise Exception("Heartbeat disk already in use") log.info("No master found. I'm now the new master of %s." % (core.cfg['CLUSTER_NAME'])) self.role=MasterService.RL_ACTIVE self.master=DNSCache.getInstance().name self.status[self.master]={'timestamp': 0, 'offset': 0, 'vms': []} self.disk.make_slot(DNSCache.getInstance().name) startHeartbeats() else: # Passive master self.role=MasterService.RL_JOINING log.info("Trying to join cluster %s..." % (core.cfg['CLUSTER_NAME'])) factory = pb.PBClientFactory() rpcConnector = reactor.connectTCP(self.master, core.cfg['TCP_PORT'], factory) d = factory.getRootObject() d.addCallback(masterConnected) d.addErrback(log.err) except Exception, e: log.err("Startup failed: %s. Shutting down." % (e)) self.stopService()
def __init__(self): self.role = MasterService.RL_ALONE # Current role of this node self.state = MasterService.ST_NORMAL # Current cluster error status self.master = None # Name of the active master self.masterLastSeen = 0 # Timestamp for master failover self.status = dict() # Whole cluster status self.localNode = Node(DNSCache.getInstance().name) self.disk = DiskHeartbeat() self.s_slaveHb = SlaveHearbeatService(self) self.s_masterHb = MasterHeartbeatService(self) self.s_rpc = RPCService(self) # Watchdogs for failover self.l_slaveDog = task.LoopingCall(self.checkMasterHeartbeat) self.l_masterDog = task.LoopingCall(self.checkSlaveHeartbeats) # Election Stuff self.ballotBox = None # All received votes self.currentElection = None # Election name, none if no pending election self.f_tally = None # IDelayedCall used to trigger countVotes() self.lastTallyDate = 0 # Timestamp for debbuging elections self.panicRequested = False # True if panic is requested during election
def joinCluster(self): def startHeartbeats(): self._startSlave() self.s_rpc.startService() if self.role == MasterService.RL_ACTIVE: self._startMaster() def joinRefused(reason): reason.trap(NodeRefusedError, RPCRefusedError) log.err("Join to cluster %s failed: Master %s has refused me: %s" % (core.cfg['CLUSTER_NAME'], self.master, reason.getErrorMessage())) self.stopService() def joinAccepted(result): self.role = MasterService.RL_PASSIVE log.info("Join successfull, I'm now part of cluster %s." % (core.cfg['CLUSTER_NAME'])) startHeartbeats() def masterConnected(obj): d = obj.callRemote("register", DNSCache.getInstance().name) d.addCallbacks(joinAccepted, joinRefused) d.addErrback(log.err) d.addBoth(lambda _: rpcConnector.disconnect()) return d try: if self.master is None: # New active master if DNSCache.getInstance( ).name not in core.cfg['ALLOWED_NODES']: log.warn( "I'm not allowed to create a new cluster. Exiting.") raise Exception("Cluster creation not allowed") if DiskHeartbeat.is_in_use(): log.err("Heartbeat disk is in use but we are alone !") raise Exception("Heartbeat disk already in use") log.info("No master found. I'm now the new master of %s." % (core.cfg['CLUSTER_NAME'])) self.role = MasterService.RL_ACTIVE self.master = DNSCache.getInstance().name self.status[self.master] = { 'timestamp': 0, 'offset': 0, 'vms': [] } self.disk.make_slot(DNSCache.getInstance().name) startHeartbeats() else: # Passive master self.role = MasterService.RL_JOINING log.info("Trying to join cluster %s..." % (core.cfg['CLUSTER_NAME'])) factory = pb.PBClientFactory() rpcConnector = reactor.connectTCP(self.master, core.cfg['TCP_PORT'], factory) d = factory.getRootObject() d.addCallback(masterConnected) d.addErrback(log.err) except Exception, e: log.err("Startup failed: %s. Shutting down." % (e)) self.stopService()