def testMasterKilledWithOneStandalone(self): yield self.agency.initiate() yield self.wait_for(self.agency.is_idle, 20) yield self.wait_for_slave() self.info('terminating master') pid = run.get_pid(os.path.curdir) run.term_pid(pid) yield self.wait_for_master_gone() yield self.wait_for_master_back() # we should have a pid now yield self.wait_for_pid(self.pid_path)
def testBackupAgency(self): pid_path = os.path.join(os.path.curdir, "feat.pid") hostname = self.agency.get_hostname() yield self.spawn_agency() yield self.wait_for_pid(pid_path) def host_descriptor(): def check(host_desc): return host_desc.instance_id == 1 d = self.db.get_document(hostname) d.addCallbacks(check, failure.Failure.trap, errbackArgs=(NotFoundError,)) return d yield self.wait_for(host_descriptor, 5) yield common.delay(None, 5) yield self.agency.initiate() yield self.wait_for_slave() pid = run.get_pid(os.path.curdir) run.term_pid(pid) # now cleanup the stale descriptors the way the monitor agent would yield self.wait_for_master() yield host_restart.do_cleanup(self.db, hostname) def has_host(): m = self.agency._get_host_medium() return m is not None and m.is_ready() yield self.wait_for(has_host, 15) host_desc = yield self.db.get_document(hostname) # for host agent the instance id should not increase # (this is only the case for agents run by host agent) self.assertEqual(1, host_desc.instance_id) yield self.wait_for_backup() slave = self.agency._broker.slaves.values()[0] self.info("killing slave %s", slave.slave_id) d = slave.callRemote("shutdown", stop_process=True) self.assertFailure(d, pb.PBConnectionLost) yield d yield common.delay(None, 0.5) yield self.wait_for_backup() slave2 = self.agency._broker.slaves.values()[0] self.assertNotEqual(slave.slave_id, slave2.slave_id)
def testBackupAgency(self): pid_path = os.path.join(os.path.curdir, 'feat.pid') hostname = unicode(socket.gethostbyaddr(socket.gethostname())[0]) yield self.spawn_agency() yield self.wait_for_pid(pid_path) def host_descriptor(): def check(host_desc): return host_desc.instance_id == 1 d = self.db.get_document(hostname) d.addCallbacks(check, failure.Failure.trap, errbackArgs=(NotFoundError, )) return d yield self.wait_for(host_descriptor, 5) yield common.delay(None, 5) yield self.agency.initiate() yield self.wait_for_slave() pid = run.get_pid(os.path.curdir) run.term_pid(pid) yield self.wait_for_master() def has_host(): m = self.agency._get_host_medium() return m is not None and m.is_ready() yield self.wait_for(has_host, 10) host_desc = yield self.db.get_document(hostname) self.assertEqual(2, host_desc.instance_id) yield self.wait_for_backup() slave = self.agency._broker.slaves.values()[0] self.info('killing slave') d = slave.callRemote('shutdown', stop_process=True) self.assertFailure(d, pb.PBConnectionLost) yield d yield common.delay(None, 0.5) yield self.wait_for_backup() slave2 = self.agency._broker.slaves.values()[0] self.assertNotEqual(slave.slave_id, slave2.slave_id)
def testLockAlreadyTaken(self): self.lock_fd = open(self.lock_path, 'rb+') if not fcntl.lock(self.lock_fd): self.fail("Could not take the lock") yield self.agency.initiate() yield self.wait_for_slave() pid = run.get_pid(os.path.curdir) run.term_pid(pid) yield self.wait_for_master_gone() yield common.delay(None, 10) pid = run.get_pid(os.path.curdir) self.assertTrue(pid is None) # remove the lock so that the broker in our # agency can connect and stop retrying, overwise the test # will finish in undefined way (this is part of the teardown) fcntl.unlock(self.lock_fd)