Example #1
0
	def format():
		if DiskHeartbeat.is_in_use():
			nodes=DiskHeartbeat().get_all_ts().keys()
			print "Warning: Heartbeat disk is in use by:", ", ".join(nodes)

			if(raw_input("Proceed ? [y/N]:").upper() != "Y"):
				print "Aborded by user."
				raise SystemExit(0)

		DiskHeartbeat.format()
		print "Device", core.cfg['HB_DISK'], "formatted."
Example #2
0
File: master.py Project: nagius/cxm
	def joinCluster(self):

		def startHeartbeats():
			self._startSlave()
			self.s_rpc.startService()

			if self.role == MasterService.RL_ACTIVE:
				self._startMaster() 

		def joinRefused(reason):
			reason.trap(NodeRefusedError, RPCRefusedError)
			log.err("Join to cluster %s failed: Master %s has refused me: %s" % 
				(core.cfg['CLUSTER_NAME'], self.master, reason.getErrorMessage()))
			self.stopService()

		def joinAccepted(result):
			self.role=MasterService.RL_PASSIVE
			log.info("Join successfull, I'm now part of cluster %s." % (core.cfg['CLUSTER_NAME']))
			startHeartbeats()
			
		def masterConnected(obj):
			d = obj.callRemote("register",DNSCache.getInstance().name)
			d.addCallbacks(joinAccepted,joinRefused)
			d.addErrback(log.err)
			d.addBoth(lambda _: rpcConnector.disconnect())
			return d

		try:
			if self.master is None:
				# New active master
				if DNSCache.getInstance().name not in core.cfg['ALLOWED_NODES']:
					log.warn("I'm not allowed to create a new cluster. Exiting.")
					raise Exception("Cluster creation not allowed")

				if DiskHeartbeat.is_in_use():
					log.err("Heartbeat disk is in use but we are alone !")
					raise Exception("Heartbeat disk already in use")

				log.info("No master found. I'm now the new master of %s." % (core.cfg['CLUSTER_NAME']))
				self.role=MasterService.RL_ACTIVE
				self.master=DNSCache.getInstance().name
				self.status[self.master]={'timestamp': 0, 'offset': 0, 'vms': []}
				self.disk.make_slot(DNSCache.getInstance().name)
				startHeartbeats()

			else:
				# Passive master
				self.role=MasterService.RL_JOINING
				log.info("Trying to join cluster %s..." % (core.cfg['CLUSTER_NAME']))

				factory = pb.PBClientFactory()
				rpcConnector = reactor.connectTCP(self.master, core.cfg['TCP_PORT'], factory)
				d = factory.getRootObject()
				d.addCallback(masterConnected)
				d.addErrback(log.err)
		except Exception, e:
			log.err("Startup failed: %s. Shutting down." % (e))
			self.stopService()
Example #3
0
    def __init__(self):
        self.role = MasterService.RL_ALONE  # Current role of this node
        self.state = MasterService.ST_NORMAL  # Current cluster error status
        self.master = None  # Name of the active master
        self.masterLastSeen = 0  # Timestamp for master failover
        self.status = dict()  # Whole cluster status
        self.localNode = Node(DNSCache.getInstance().name)
        self.disk = DiskHeartbeat()
        self.s_slaveHb = SlaveHearbeatService(self)
        self.s_masterHb = MasterHeartbeatService(self)
        self.s_rpc = RPCService(self)

        # Watchdogs for failover
        self.l_slaveDog = task.LoopingCall(self.checkMasterHeartbeat)
        self.l_masterDog = task.LoopingCall(self.checkSlaveHeartbeats)

        # Election Stuff
        self.ballotBox = None  # All received votes
        self.currentElection = None  # Election name, none if no pending election
        self.f_tally = None  # IDelayedCall used to trigger countVotes()
        self.lastTallyDate = 0  # Timestamp for debbuging elections
        self.panicRequested = False  # True if panic is requested during election
Example #4
0
    def joinCluster(self):
        def startHeartbeats():
            self._startSlave()
            self.s_rpc.startService()

            if self.role == MasterService.RL_ACTIVE:
                self._startMaster()

        def joinRefused(reason):
            reason.trap(NodeRefusedError, RPCRefusedError)
            log.err("Join to cluster %s failed: Master %s has refused me: %s" %
                    (core.cfg['CLUSTER_NAME'], self.master,
                     reason.getErrorMessage()))
            self.stopService()

        def joinAccepted(result):
            self.role = MasterService.RL_PASSIVE
            log.info("Join successfull, I'm now part of cluster %s." %
                     (core.cfg['CLUSTER_NAME']))
            startHeartbeats()

        def masterConnected(obj):
            d = obj.callRemote("register", DNSCache.getInstance().name)
            d.addCallbacks(joinAccepted, joinRefused)
            d.addErrback(log.err)
            d.addBoth(lambda _: rpcConnector.disconnect())
            return d

        try:
            if self.master is None:
                # New active master
                if DNSCache.getInstance(
                ).name not in core.cfg['ALLOWED_NODES']:
                    log.warn(
                        "I'm not allowed to create a new cluster. Exiting.")
                    raise Exception("Cluster creation not allowed")

                if DiskHeartbeat.is_in_use():
                    log.err("Heartbeat disk is in use but we are alone !")
                    raise Exception("Heartbeat disk already in use")

                log.info("No master found. I'm now the new master of %s." %
                         (core.cfg['CLUSTER_NAME']))
                self.role = MasterService.RL_ACTIVE
                self.master = DNSCache.getInstance().name
                self.status[self.master] = {
                    'timestamp': 0,
                    'offset': 0,
                    'vms': []
                }
                self.disk.make_slot(DNSCache.getInstance().name)
                startHeartbeats()

            else:
                # Passive master
                self.role = MasterService.RL_JOINING
                log.info("Trying to join cluster %s..." %
                         (core.cfg['CLUSTER_NAME']))

                factory = pb.PBClientFactory()
                rpcConnector = reactor.connectTCP(self.master,
                                                  core.cfg['TCP_PORT'],
                                                  factory)
                d = factory.getRootObject()
                d.addCallback(masterConnected)
                d.addErrback(log.err)
        except Exception, e:
            log.err("Startup failed: %s. Shutting down." % (e))
            self.stopService()