class TestAgent(unittest.TestCase, AgentCommonTests): def shortDescription(self): return None def configure_host(self): config_req = Host.GetConfigRequest() host_config = self.host_client.get_host_config(config_req).hostConfig leaf_scheduler = SchedulerRole(stable_uuid("leaf scheduler")) leaf_scheduler.parent_id = stable_uuid("parent scheduler") leaf_scheduler.hosts = [host_config.agent_id] config_request = ConfigureRequest( stable_uuid("leaf scheduler"), Roles([leaf_scheduler])) self.host_client.configure(config_request) def test_bootstrap(self): # Verify the negative case first as it has no sideeffect. self._update_agent_invalid_config() self.runtime.stop_agent(self.proc) new_config = self.config.copy() # Don't set availability zone since # self._update_agent_config() sets it. del new_config["--availability-zone"] res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res req = self._update_agent_config() # Start back the agent and verify that the config seen by the agent is # the same we requested for. res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res self._validate_post_boostrap_config(req) @property def agent_in_uwsim(self): return False def setUp(self): self.runtime = RuntimeUtils(self.id()) self.config = self.runtime.get_default_agent_config() self._datastores = ["datastore1", "datastore2"] self.config["--datastores"] = ",".join(self._datastores) res = self.runtime.start_agent(self.config) self.proc, self.host_client, self.control_client = res self.configure_host() self.set_host_mode(HostMode.NORMAL) self.clear_datastore_tags() def tearDown(self): self.runtime.cleanup() def test_agent_with_invalid_vsi(self): self.runtime.stop_agent(self.proc) # Since the config file takes precedence over the command line # options we need to remove the directory that was created # from the start_agent in the setup method. shutil.rmtree(self.config["--config-path"]) new_config = self.config.copy() new_config["--hypervisor"] = "esx" res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res time_waited = 0 while self._agent_running() and time_waited < 5: time.sleep(0.2) time_waited += 0.2 self.assertFalse(self._agent_running()) def test_datastore_parse(self): """Test that the agent parses datastore args""" self.runtime.stop_agent(self.proc) new_config = self.config.copy() new_config["--datastores"] = " ds1,ds2, ds3, ds4 " res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res request = Host.GetConfigRequest() response = self.host_client.get_host_config(request) datastore_ids = [ds.id for ds in response.hostConfig.datastores] expected_ids = [str(uuid.uuid5(uuid.NAMESPACE_DNS, name)) for name in ["ds1", "ds2", "ds3", "ds4"]] assert_that(datastore_ids, equal_to(expected_ids)) def test_management_only_parse(self): # Default option for management_only is False request = Host.GetConfigRequest() response = self.host_client.get_host_config(request) assert_that(response.result, equal_to(Host.GetConfigResultCode.OK)) assert_that(response.hostConfig.management_only, equal_to(False)) # Restart agent with --management-only option, test the flag is set # to True self.runtime.stop_agent(self.proc) new_config = self.config.copy() new_config["--management-only"] = None res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res request = Host.GetConfigRequest() response = self.host_client.get_host_config(request) assert_that(response.result, equal_to(Host.GetConfigResultCode.OK)) assert_that(response.hostConfig.management_only, equal_to(True)) def test_create_vm_with_ephemeral_disks_ttylinux(self): self._test_create_vm_with_ephemeral_disks("ttylinux") def test_create_vm_with_ephemeral_disks(self): image_dir = os.path.join( "/tmp/images", FakeHypervisor.datastore_id(self.get_image_datastore())) try: mkdir_p(image_dir) with tempfile.NamedTemporaryFile(dir=image_dir, suffix=".vmdk") as f: # The temp file name created is # "/tmp/image/<ds>/<uniquepart>.vmdk". # This simulates an image being present on the agent, # The file is deleted on leaving the context. image_id = f.name[f.name.rfind("/") + 1:-5] self._test_create_vm_with_ephemeral_disks(image_id) finally: rm_rf(image_dir) def _agent_running(self): if not self.proc: return False try: os.kill(self.proc.pid, 0) return True except OSError as e: if e.errno == errno.ESRCH: return False elif e.errno == errno.EPERM: return True else: raise e
class TestTreeIntrospection(BaseKazooTestCase): def setUp(self): self.set_up_kazoo_base() self.zk_client = self._get_nonchroot_client() self.zk_client.start() self.runtime = RuntimeUtils(self.id()) # Create zk paths self.zk_client.create(MISSING_PREFIX) self.zk_client.create(HOSTS_PREFIX) self.zk_client.create(ROLES_PREFIX) self.root_conf = {} self.root_conf["healthcheck"] = {} self.root_conf["zookeeper"] = {} self.root_conf["zookeeper"]["quorum"] = "localhost:%i" % (DEFAULT_ZK_PORT,) self.root_conf["healthcheck"]["timeout_ms"] = ROOT_SCHEDULER_TIME_OUT self.root_conf["healthcheck"]["period_ms"] = ROOT_SCHEDULER_PERIOD # start root scheduler self.root_host = "localhost" self.root_port = 15000 self.root_conf["bind"] = self.root_host self.root_conf["port"] = self.root_port self.runtime.start_root_scheduler(self.root_conf) (self.root_transport, self.root_sch_client) = create_root_client(self.root_port, self.root_host) # start chairman self.chairman_host = "localhost" self.chairman_port = 13000 self.leaf_fanout = 2 self.runtime.start_chairman(self.chairman_host, self.chairman_port, self.leaf_fanout) (self.chairman_transport, self.chairman_client) = create_chairman_client(self.chairman_host, self.chairman_port) # Wait for chairman and root scheduler to finish their elections _wait_on_code(self.root_sch_client.get_schedulers, GetSchedulersResultCode.OK) _wait_on_code(self.chairman_client.get_schedulers, GetSchedulersResultCode.OK, GetSchedulersRequest) def tearDown(self): self.runtime.cleanup() self.zk_client.stop() self.zk_client.close() self.tear_down_kazoo_base() def test_get_service_leader(self): """Test get service leader""" # Check the chairman leader (address, port) = get_service_leader(self.zk_client, CHAIRMAN_SERVICE) assert_that(address, is_(self.chairman_host)) assert_that(port, is_(self.chairman_port)) deleted = threading.Event() def _deleted(children): if not children: deleted.set() self.zk_client.ChildrenWatch(CHAIRMAN_SERVICE, _deleted) # Stop chairman stop_service(self.runtime.chairman_procs[0]) # Wait for the leader to leave deleted.wait(30) res = get_service_leader(self.zk_client, CHAIRMAN_SERVICE) assert_that(res, is_(None)) def test_get_root_scheduler(self): """Test root scheduler introspection""" (root_host, root_port) = get_service_leader(self.zk_client, ROOT_SCHEDULER_SERVICE) # Verify that an empty root scheduler is constructed # correctly root_sch = get_root_scheduler(root_host, root_port) assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID)) assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE)) assert_that(len(root_sch.children), is_(0)) assert_that(root_sch.owner, not_none()) root_owner = root_sch.owner assert_that(root_owner.id, is_(ROOT_SCHEDULER_ID)) assert_that(root_owner.address, is_(root_host)) assert_that(root_owner.port, is_(root_port)) assert_that(root_owner.parent, is_(None)) # Start an agent agent_host = "localhost" agent_port = 20000 config = self.runtime.get_agent_config(agent_host, agent_port, self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) agent_client = res[1] # Wait for the root scheduler to be configured _wait_for_configuration(self.root_sch_client, 1) new_root_sch = get_root_scheduler(root_host, root_port) assert_that(len(new_root_sch.children), is_(1)) req = THost.GetConfigRequest() agent_id = agent_client.get_host_config(req).hostConfig.agent_id leaf = new_root_sch.children.values()[0] assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE)) assert_that(leaf.parent, is_(new_root_sch)) assert_that(len(leaf.children), is_(0)) assert_that(leaf.owner.id, is_(agent_id)) assert_that(leaf.owner.address, is_(agent_host)) assert_that(leaf.owner.port, is_(agent_port)) assert_that(leaf.owner.parent, is_(leaf)) deleted = threading.Event() def _deleted(children): if not children: deleted.set() self.zk_client.ChildrenWatch(ROOT_SCHEDULER_SERVICE, _deleted) stop_service(self.runtime.root_procs[0]) # Wait for the leader to leave deleted.wait(30) emoty_root = get_root_scheduler(root_host, root_port) assert_that(emoty_root, is_(emoty_root)) def test_get_leaf_scheduler(self): """Test agent introspection""" agent_host = "localhost" agent_port = 20000 # Agent not online leaf = get_leaf_scheduler(agent_host, agent_port) assert_that(leaf, is_(None)) # Start an agent with an invalid chairman, so that it doesn't # get configured, because we want to configure it manually config = self.runtime.get_agent_config(agent_host, agent_port, "localhost", 24234) res = self.runtime.start_agent(config) agent_client = res[1] # Agent is online but not a leaf scheduler leaf = get_leaf_scheduler(agent_host, agent_port) assert_that(leaf, is_(None)) leafId1 = stable_uuid("leaf scheduler") config_req = THost.GetConfigRequest() host_config = agent_client.get_host_config(config_req).hostConfig leaf_scheduler = SchedulerRole(leafId1) leaf_scheduler.parent_id = stable_uuid("parent scheduler") leaf_scheduler.hosts = [host_config.agent_id] leaf_scheduler.host_children = [ChildInfo(id=host_config.agent_id, address=agent_host, port=agent_port)] config_request = ConfigureRequest(leafId1, Roles([leaf_scheduler])) resp = agent_client.configure(config_request) assert_that(resp.result, is_(ConfigureResultCode.OK)) leaf = get_leaf_scheduler(agent_host, agent_port) assert_that(leaf.id, not_none()) assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE)) assert_that(len(leaf.children), is_(1)) # Verify the owner host owner_host = leaf.owner assert_that(owner_host, not_none()) assert_that(owner_host.id, is_(host_config.agent_id)) assert_that(owner_host.address, is_(agent_host)) assert_that(owner_host.port, is_(agent_port)) assert_that(owner_host.parent, is_(leaf)) def _check_tree(self, root_sch, root_address, root_port, _fanout, agents_list): """ This method checks if a hierarchy is correctly constructed, assuming the agents were sequently added to the hierarchy. The check will fail on a condition by failing an assertion. root_address: a string, root scheduler's address root_port: an int, root scheduler's port fanout: an integer that specifies the max fanout agent_list: a list of tubles (id, address, port), where every tuple represents an agent """ # This method will split a list into multiple lists, where the # inner lists represent leaf schedulers i.e. # [[leaf1_owner, leaf1_child2 ... ],[leaf2_owner, leaf2_child2 ... ]] # leafX_owner is a tuple of (id, address, port) def split_list_by_fanout(_list, fanout): for i in xrange(0, len(_list), fanout): yield _list[i : i + fanout] leaves = list(split_list_by_fanout(agents_list, _fanout)) # check root assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID)) assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE)) assert_that(root_sch.owner, not_none()) assert_that(root_sch.owner.address, is_(root_address)) assert_that(root_sch.owner.port, is_(root_port)) assert_that(len(root_sch.children), is_(len(leaves))) # Map scheduler hosts, the map will look like this: # {leaf_owner_host_id:[(leaf_owner_host_id, address, port)]...}\ sch_hosts = {} for leaf in leaves: sch_hosts[leaf[0][0]] = leaf for child in root_sch.children.values(): leaf_owner_id = child.owner.id assert_that(leaf_owner_id, is_(sch_hosts[leaf_owner_id][0][0])) assert_that(child.parent.owner.id, is_(ROOT_SCHEDULER_ID)) assert_that(child.owner.address, is_(sch_hosts[leaf_owner_id][0][1])) assert_that(child.owner.port, is_(sch_hosts[leaf_owner_id][0][2])) assert_that(child.owner.parent, is_(child)) assert_that(child.type, is_(LEAF_SCHEDULER_TYPE)) # Veirfy the leaf's child hosts children = sch_hosts[leaf_owner_id] # map child hosts children_map = {} for c in children: children_map[c[0]] = c for child_host in child.children.values(): assert_that(children_map.get(child_host.id, None), not_none()) assert_that(children_map[child_host.id][0], is_(child_host.id)) assert_that(children_map[child_host.id][1], is_(child_host.address)) assert_that(children_map[child_host.id][2], is_(child_host.port)) assert_that(child_host.parent, is_(child)) def wait_for_registration(self, agent_id, timeout=10): """Waits for _id to be created in /hosts""" completed = threading.Event() def wait_created(data, stat, event): """Set the event once the node exists.""" if stat: completed.set() self.zk_client.DataWatch(ROLES_PREFIX + "/" + agent_id, wait_created) completed.wait(timeout) assert_that(completed.isSet(), is_(True)) def _start_agents(self, agent_host, agent_ports): """Start agents on different ports. When agents register sequentially, the order of events of onHostAdded is not guaranteed. For example, if agent A, B then C register, they wont be inserted into the hierarchy in that order, a possible order can be B, A then C. Thus, we wait on hosts that we think will own a leaf scheduler before registering more agents that will go under the same leaf. """ agent_ids = [] for ind in xrange(len(agent_ports)): config = self.runtime.get_agent_config(agent_host, agent_ports[ind], self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) agent_client = res[1] config_req = THost.GetConfigRequest() config = agent_client.get_host_config(config_req).hostConfig agent_id = config.agent_id if ind % self.leaf_fanout == 0: self.wait_for_registration(agent_id) agent_ids.append(agent_id) return agent_ids def test_get_hierarchy_from_zk(self): agent_host = "localhost" agent_port1 = 20000 agent_port2 = 20001 agent_port3 = 20002 agent_ids = self._start_agents(agent_host, [agent_port1, agent_port2, agent_port3]) # The chairman will persist the schedulers then push the # configurations, thus after we detect that the root scheduler # has been configured we know that the leaf schedulers have already # been persisted to zk _wait_for_configuration(self.root_sch_client, 2) root = get_hierarchy_from_zk(self.zk_client) agent_list = [ (agent_ids[0], agent_host, agent_port1), (agent_ids[1], agent_host, agent_port2), (agent_ids[2], agent_host, agent_port3), ] # verify the hierarchy structure self._check_tree(root, self.root_host, self.root_port, self.leaf_fanout, agent_list) def test_get_hierarchy_from_chairman(self): agent_host = "localhost" agent_port1 = 20000 agent_port2 = 20001 agent_port3 = 20002 agent_ids = self._start_agents(agent_host, [agent_port1, agent_port2, agent_port3]) _wait_for_configuration(self.root_sch_client, 2) root = get_hierarchy_from_chairman(self.chairman_host, self.chairman_port, self.root_host, self.root_port) agent_list = [ (agent_ids[0], agent_host, agent_port1), (agent_ids[1], agent_host, agent_port2), (agent_ids[2], agent_host, agent_port3), ] # verify the hierarchy structure self._check_tree(root, self.root_host, self.root_port, self.leaf_fanout, agent_list) def test_update_status(self): agent_host = "localhost" agent_port1 = 20000 config = self.runtime.get_agent_config(agent_host, agent_port1, self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) _wait_for_configuration(self.root_sch_client, 1) root = get_hierarchy_from_zk(self.zk_client) # Update the hierarchy status root.update_status() # verify that the root scheduler and leaf are online assert_that(root.owner.status, is_(STATUS_ONLINE)) assert_that(len(root.children), is_(1)) assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE)) # Kill both root scheduler and leaf host stop_service(self.runtime.root_procs[0]) self.runtime.stop_agent(res[0]) # Update the hierarchy status root.update_status() assert_that(root.owner.status, is_(STATUS_OFFLINE)) assert_that(root.children.values()[0].owner.status, is_(STATUS_OFFLINE)) # Start the root scheduler and leaf scheduler self.runtime.start_root_scheduler(self.root_conf) config = self.runtime.get_agent_config(agent_host, agent_port1, self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) (self.root_transport, self.root_sch_client) = create_root_client(self.root_port, self.root_host) # Wait for the root scheduler's leader election _wait_on_code(self.root_sch_client.get_schedulers, GetSchedulersResultCode.OK) # Check the status again root.update_status() # verify that the root scheduler and leaf are online assert_that(root.owner.status, is_(STATUS_ONLINE)) assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE)) def test_get_hosts_from_zk(self): hosts = get_hosts_from_zk(self.zk_client) assert_that(len(hosts), is_(0)) networks = [Network("nw1", [NetworkType.VM])] dsid = str(uuid.uuid4()) datastores = [Datastore(dsid, "ds1", DatastoreType.SHARED_VMFS)] # Register two hosts agent_host = "localhost" agent1_port = 12345 req1 = get_register_host_request( agent_host, agent1_port, agent_id="host1", networks=networks, datastores=datastores, image_datastore=dsid, availability_zone="az1", ) agent2_port = 12346 req2 = get_register_host_request( agent_host, agent2_port, agent_id="host2", networks=networks, datastores=datastores, image_datastore=dsid, availability_zone="az1", ) # Register two hosts resp = self.chairman_client.register_host(req1) assert_that(resp.result, is_(RegisterHostResultCode.OK)) resp = self.chairman_client.register_host(req2) assert_that(resp.result, is_(RegisterHostResultCode.OK)) hosts = get_hosts_from_zk(self.zk_client) # map list to dict indexed by host id hosts = dict((h.id, h) for h in hosts) assert_that(len(hosts), is_(2)) _h1 = hosts[req1.config.agent_id] _h2 = hosts[req2.config.agent_id] # Verify that the requests match the hosts that were # constructed by get_hosts_from_zk assert_that(req1.config.agent_id, _h1.id) assert_that(req2.config.agent_id, _h2.id) assert_that(req1.config.address.host, _h1.address) assert_that(req2.config.address.port, _h2.port) def test_get_missing_hosts_from_zk(self): missing = get_missing_hosts_from_zk(self.zk_client) assert_that(len(missing), is_(0)) missing_hosts = ["h2", "h3"] req = ReportMissingRequest("host1", None, missing_hosts) resp = self.chairman_client.report_missing(req) assert_that(resp.result, is_(ReportMissingResultCode.OK)) missing = get_missing_hosts_from_zk(self.zk_client) assert_that(missing[0] in missing_hosts, is_(True)) assert_that(missing[0] in missing_hosts, is_(True))
class TestAgent(unittest.TestCase, AgentCommonTests): def shortDescription(self): return None def configure_host(self): config_req = Host.GetConfigRequest() host_config = self.host_client.get_host_config(config_req).hostConfig leaf_scheduler = SchedulerRole(stable_uuid("leaf scheduler")) leaf_scheduler.parent_id = stable_uuid("parent scheduler") leaf_scheduler.hosts = [host_config.agent_id] config_request = ConfigureRequest(stable_uuid("leaf scheduler"), Roles([leaf_scheduler])) self.host_client.configure(config_request) def test_bootstrap(self): # Verify the negative case first as it has no sideeffect. self._update_agent_invalid_config() self.runtime.stop_agent(self.proc) new_config = self.config.copy() # Don't set availability zone since # self._update_agent_config() sets it. del new_config["--availability-zone"] res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res req = self._update_agent_config() # Start back the agent and verify that the config seen by the agent is # the same we requested for. res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res self._validate_post_boostrap_config(req) @property def agent_in_uwsim(self): return False def setUp(self): self.runtime = RuntimeUtils(self.id()) self.config = self.runtime.get_default_agent_config() self._datastores = ["datastore1", "datastore2"] self.config["--datastores"] = ",".join(self._datastores) res = self.runtime.start_agent(self.config) self.proc, self.host_client, self.control_client = res self.configure_host() self.set_host_mode(HostMode.NORMAL) self.clear_datastore_tags() def tearDown(self): self.runtime.cleanup() def test_agent_with_invalid_vsi(self): self.runtime.stop_agent(self.proc) # Since the config file takes precedence over the command line # options we need to remove the directory that was created # from the start_agent in the setup method. shutil.rmtree(self.config["--config-path"]) new_config = self.config.copy() new_config["--hypervisor"] = "esx" res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res time_waited = 0 while self._agent_running() and time_waited < 5: time.sleep(0.2) time_waited += 0.2 self.assertFalse(self._agent_running()) def test_datastore_parse(self): """Test that the agent parses datastore args""" self.runtime.stop_agent(self.proc) new_config = self.config.copy() new_config["--datastores"] = " ds1,ds2, ds3, ds4 " res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res request = Host.GetConfigRequest() response = self.host_client.get_host_config(request) datastore_ids = [ds.id for ds in response.hostConfig.datastores] expected_ids = [ str(uuid.uuid5(uuid.NAMESPACE_DNS, name)) for name in ["ds1", "ds2", "ds3", "ds4"] ] assert_that(datastore_ids, equal_to(expected_ids)) def test_management_only_parse(self): # Default option for management_only is False request = Host.GetConfigRequest() response = self.host_client.get_host_config(request) assert_that(response.result, equal_to(Host.GetConfigResultCode.OK)) assert_that(response.hostConfig.management_only, equal_to(False)) # Restart agent with --management-only option, test the flag is set # to True self.runtime.stop_agent(self.proc) new_config = self.config.copy() new_config["--management-only"] = None res = self.runtime.start_agent(new_config) self.proc, self.host_client, self.control_client = res request = Host.GetConfigRequest() response = self.host_client.get_host_config(request) assert_that(response.result, equal_to(Host.GetConfigResultCode.OK)) assert_that(response.hostConfig.management_only, equal_to(True)) def test_create_vm_with_ephemeral_disks_ttylinux(self): self._test_create_vm_with_ephemeral_disks("ttylinux") def test_create_vm_with_ephemeral_disks(self): image_dir = os.path.join( "/tmp/images", FakeHypervisor.datastore_id(self.get_image_datastore())) try: mkdir_p(image_dir) with tempfile.NamedTemporaryFile(dir=image_dir, suffix=".vmdk") as f: # The temp file name created is # "/tmp/image/<ds>/<uniquepart>.vmdk". # This simulates an image being present on the agent, # The file is deleted on leaving the context. image_id = f.name[f.name.rfind("/") + 1:-5] self._test_create_vm_with_ephemeral_disks(image_id) finally: rm_rf(image_dir) def _agent_running(self): if not self.proc: return False try: os.kill(self.proc.pid, 0) return True except OSError as e: if e.errno == errno.ESRCH: return False elif e.errno == errno.EPERM: return True else: raise e
class TestTreeIntrospection(BaseKazooTestCase): def setUp(self): self.set_up_kazoo_base() self.zk_client = self._get_nonchroot_client() self.zk_client.start() self.runtime = RuntimeUtils(self.id()) # Create zk paths self.zk_client.create(MISSING_PREFIX) self.zk_client.create(HOSTS_PREFIX) self.zk_client.create(ROLES_PREFIX) self.root_conf = {} self.root_conf['healthcheck'] = {} self.root_conf['zookeeper'] = {} self.root_conf['zookeeper']['quorum'] = ("localhost:%i" % (DEFAULT_ZK_PORT, )) self.root_conf['healthcheck']['timeout_ms'] = ROOT_SCHEDULER_TIME_OUT self.root_conf['healthcheck']['period_ms'] = ROOT_SCHEDULER_PERIOD # start root scheduler self.root_host = "localhost" self.root_port = 15000 self.root_conf['bind'] = self.root_host self.root_conf['port'] = self.root_port self.runtime.start_root_scheduler(self.root_conf) (self.root_transport, self.root_sch_client) = create_root_client(self.root_port, self.root_host) # start chairman self.chairman_host = 'localhost' self.chairman_port = 13000 self.leaf_fanout = 2 self.runtime.start_chairman(self.chairman_host, self.chairman_port, self.leaf_fanout) (self.chairman_transport, self.chairman_client) = \ create_chairman_client(self.chairman_host, self.chairman_port) # Wait for chairman and root scheduler to finish their elections _wait_on_code(self.root_sch_client.get_schedulers, GetSchedulersResultCode.OK) _wait_on_code(self.chairman_client.get_schedulers, GetSchedulersResultCode.OK, GetSchedulersRequest) def tearDown(self): self.runtime.cleanup() self.zk_client.stop() self.zk_client.close() self.tear_down_kazoo_base() def test_get_service_leader(self): """Test get service leader""" # Check the chairman leader (address, port) = get_service_leader(self.zk_client, CHAIRMAN_SERVICE) assert_that(address, is_(self.chairman_host)) assert_that(port, is_(self.chairman_port)) deleted = threading.Event() def _deleted(children): if not children: deleted.set() self.zk_client.ChildrenWatch(CHAIRMAN_SERVICE, _deleted) # Stop chairman stop_service(self.runtime.chairman_procs[0]) # Wait for the leader to leave deleted.wait(30) res = get_service_leader(self.zk_client, CHAIRMAN_SERVICE) assert_that(res, is_(None)) def test_get_root_scheduler(self): """Test root scheduler introspection""" (root_host, root_port) = get_service_leader(self.zk_client, ROOT_SCHEDULER_SERVICE) # Verify that an empty root scheduler is constructed # correctly root_sch = get_root_scheduler(root_host, root_port) assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID)) assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE)) assert_that(len(root_sch.children), is_(0)) assert_that(root_sch.owner, not_none()) root_owner = root_sch.owner assert_that(root_owner.id, is_(ROOT_SCHEDULER_ID)) assert_that(root_owner.address, is_(root_host)) assert_that(root_owner.port, is_(root_port)) assert_that(root_owner.parent, is_(None)) # Start an agent agent_host = 'localhost' agent_port = 20000 config = self.runtime.get_agent_config(agent_host, agent_port, self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) agent_client = res[1] # Wait for the root scheduler to be configured _wait_for_configuration(self.root_sch_client, 1) new_root_sch = get_root_scheduler(root_host, root_port) assert_that(len(new_root_sch.children), is_(1)) req = THost.GetConfigRequest() agent_id = agent_client.get_host_config(req).hostConfig.agent_id leaf = new_root_sch.children.values()[0] assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE)) assert_that(leaf.parent, is_(new_root_sch)) assert_that(len(leaf.children), is_(0)) assert_that(leaf.owner.id, is_(agent_id)) assert_that(leaf.owner.address, is_(agent_host)) assert_that(leaf.owner.port, is_(agent_port)) assert_that(leaf.owner.parent, is_(leaf)) deleted = threading.Event() def _deleted(children): if not children: deleted.set() self.zk_client.ChildrenWatch(ROOT_SCHEDULER_SERVICE, _deleted) stop_service(self.runtime.root_procs[0]) # Wait for the leader to leave deleted.wait(30) emoty_root = get_root_scheduler(root_host, root_port) assert_that(emoty_root, is_(emoty_root)) def test_get_leaf_scheduler(self): """Test agent introspection""" agent_host = 'localhost' agent_port = 20000 # Agent not online leaf = get_leaf_scheduler(agent_host, agent_port) assert_that(leaf, is_(None)) # Start an agent with an invalid chairman, so that it doesn't # get configured, because we want to configure it manually config = self.runtime.get_agent_config(agent_host, agent_port, "localhost", 24234) res = self.runtime.start_agent(config) agent_client = res[1] # Agent is online but not a leaf scheduler leaf = get_leaf_scheduler(agent_host, agent_port) assert_that(leaf, is_(None)) leafId1 = stable_uuid("leaf scheduler") config_req = THost.GetConfigRequest() host_config = agent_client.get_host_config(config_req).hostConfig leaf_scheduler = SchedulerRole(leafId1) leaf_scheduler.parent_id = stable_uuid("parent scheduler") leaf_scheduler.hosts = [host_config.agent_id] leaf_scheduler.host_children = [ ChildInfo(id=host_config.agent_id, address=agent_host, port=agent_port) ] config_request = ConfigureRequest(leafId1, Roles([leaf_scheduler])) resp = agent_client.configure(config_request) assert_that(resp.result, is_(ConfigureResultCode.OK)) leaf = get_leaf_scheduler(agent_host, agent_port) assert_that(leaf.id, not_none()) assert_that(leaf.type, is_(LEAF_SCHEDULER_TYPE)) assert_that(len(leaf.children), is_(1)) # Verify the owner host owner_host = leaf.owner assert_that(owner_host, not_none()) assert_that(owner_host.id, is_(host_config.agent_id)) assert_that(owner_host.address, is_(agent_host)) assert_that(owner_host.port, is_(agent_port)) assert_that(owner_host.parent, is_(leaf)) def _check_tree(self, root_sch, root_address, root_port, _fanout, agents_list): """ This method checks if a hierarchy is correctly constructed, assuming the agents were sequently added to the hierarchy. The check will fail on a condition by failing an assertion. root_address: a string, root scheduler's address root_port: an int, root scheduler's port fanout: an integer that specifies the max fanout agent_list: a list of tubles (id, address, port), where every tuple represents an agent """ # This method will split a list into multiple lists, where the # inner lists represent leaf schedulers i.e. # [[leaf1_owner, leaf1_child2 ... ],[leaf2_owner, leaf2_child2 ... ]] # leafX_owner is a tuple of (id, address, port) def split_list_by_fanout(_list, fanout): for i in xrange(0, len(_list), fanout): yield _list[i:i + fanout] leaves = list(split_list_by_fanout(agents_list, _fanout)) # check root assert_that(root_sch.id, is_(ROOT_SCHEDULER_ID)) assert_that(root_sch.type, is_(ROOT_SCHEDULER_TYPE)) assert_that(root_sch.owner, not_none()) assert_that(root_sch.owner.address, is_(root_address)) assert_that(root_sch.owner.port, is_(root_port)) assert_that(len(root_sch.children), is_(len(leaves))) # Map scheduler hosts, the map will look like this: # {leaf_owner_host_id:[(leaf_owner_host_id, address, port)]...}\ sch_hosts = {} for leaf in leaves: sch_hosts[leaf[0][0]] = leaf for child in root_sch.children.values(): leaf_owner_id = child.owner.id assert_that(leaf_owner_id, is_(sch_hosts[leaf_owner_id][0][0])) assert_that(child.parent.owner.id, is_(ROOT_SCHEDULER_ID)) assert_that(child.owner.address, is_(sch_hosts[leaf_owner_id][0][1])) assert_that(child.owner.port, is_(sch_hosts[leaf_owner_id][0][2])) assert_that(child.owner.parent, is_(child)) assert_that(child.type, is_(LEAF_SCHEDULER_TYPE)) # Veirfy the leaf's child hosts children = sch_hosts[leaf_owner_id] # map child hosts children_map = {} for c in children: children_map[c[0]] = c for child_host in child.children.values(): assert_that(children_map.get(child_host.id, None), not_none()) assert_that(children_map[child_host.id][0], is_(child_host.id)) assert_that(children_map[child_host.id][1], is_(child_host.address)) assert_that(children_map[child_host.id][2], is_(child_host.port)) assert_that(child_host.parent, is_(child)) def wait_for_registration(self, agent_id, timeout=10): """Waits for _id to be created in /hosts""" completed = threading.Event() def wait_created(data, stat, event): """Set the event once the node exists.""" if stat: completed.set() self.zk_client.DataWatch(ROLES_PREFIX + "/" + agent_id, wait_created) completed.wait(timeout) assert_that(completed.isSet(), is_(True)) def _start_agents(self, agent_host, agent_ports): """Start agents on different ports. When agents register sequentially, the order of events of onHostAdded is not guaranteed. For example, if agent A, B then C register, they wont be inserted into the hierarchy in that order, a possible order can be B, A then C. Thus, we wait on hosts that we think will own a leaf scheduler before registering more agents that will go under the same leaf. """ agent_ids = [] for ind in xrange(len(agent_ports)): config = self.runtime.get_agent_config(agent_host, agent_ports[ind], self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) agent_client = res[1] config_req = THost.GetConfigRequest() config = agent_client.get_host_config(config_req).hostConfig agent_id = config.agent_id if ind % self.leaf_fanout == 0: self.wait_for_registration(agent_id) agent_ids.append(agent_id) return agent_ids def test_get_hierarchy_from_zk(self): agent_host = 'localhost' agent_port1 = 20000 agent_port2 = 20001 agent_port3 = 20002 agent_ids = self._start_agents(agent_host, [agent_port1, agent_port2, agent_port3]) # The chairman will persist the schedulers then push the # configurations, thus after we detect that the root scheduler # has been configured we know that the leaf schedulers have already # been persisted to zk _wait_for_configuration(self.root_sch_client, 2) root = get_hierarchy_from_zk(self.zk_client) agent_list = [(agent_ids[0], agent_host, agent_port1), (agent_ids[1], agent_host, agent_port2), (agent_ids[2], agent_host, agent_port3)] # verify the hierarchy structure self._check_tree(root, self.root_host, self.root_port, self.leaf_fanout, agent_list) def test_get_hierarchy_from_chairman(self): agent_host = 'localhost' agent_port1 = 20000 agent_port2 = 20001 agent_port3 = 20002 agent_ids = self._start_agents(agent_host, [agent_port1, agent_port2, agent_port3]) _wait_for_configuration(self.root_sch_client, 2) root = get_hierarchy_from_chairman(self.chairman_host, self.chairman_port, self.root_host, self.root_port) agent_list = [(agent_ids[0], agent_host, agent_port1), (agent_ids[1], agent_host, agent_port2), (agent_ids[2], agent_host, agent_port3)] # verify the hierarchy structure self._check_tree(root, self.root_host, self.root_port, self.leaf_fanout, agent_list) def test_update_status(self): agent_host = 'localhost' agent_port1 = 20000 config = self.runtime.get_agent_config(agent_host, agent_port1, self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) _wait_for_configuration(self.root_sch_client, 1) root = get_hierarchy_from_zk(self.zk_client) # Update the hierarchy status root.update_status() # verify that the root scheduler and leaf are online assert_that(root.owner.status, is_(STATUS_ONLINE)) assert_that(len(root.children), is_(1)) assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE)) # Kill both root scheduler and leaf host stop_service(self.runtime.root_procs[0]) self.runtime.stop_agent(res[0]) # Update the hierarchy status root.update_status() assert_that(root.owner.status, is_(STATUS_OFFLINE)) assert_that(root.children.values()[0].owner.status, is_(STATUS_OFFLINE)) # Start the root scheduler and leaf scheduler self.runtime.start_root_scheduler(self.root_conf) config = self.runtime.get_agent_config(agent_host, agent_port1, self.chairman_host, self.chairman_port) res = self.runtime.start_agent(config) (self.root_transport, self.root_sch_client) = create_root_client(self.root_port, self.root_host) # Wait for the root scheduler's leader election _wait_on_code(self.root_sch_client.get_schedulers, GetSchedulersResultCode.OK) # Check the status again root.update_status() # verify that the root scheduler and leaf are online assert_that(root.owner.status, is_(STATUS_ONLINE)) assert_that(root.children.values()[0].owner.status, is_(STATUS_ONLINE)) def test_get_hosts_from_zk(self): hosts = get_hosts_from_zk(self.zk_client) assert_that(len(hosts), is_(0)) networks = [Network("nw1", [NetworkType.VM])] dsid = str(uuid.uuid4()) datastores = [Datastore(dsid, "ds1", DatastoreType.SHARED_VMFS)] # Register two hosts agent_host = "localhost" agent1_port = 12345 req1 = get_register_host_request(agent_host, agent1_port, agent_id="host1", networks=networks, datastores=datastores, image_datastore=dsid, availability_zone="az1") agent2_port = 12346 req2 = get_register_host_request(agent_host, agent2_port, agent_id="host2", networks=networks, datastores=datastores, image_datastore=dsid, availability_zone="az1") # Register two hosts resp = self.chairman_client.register_host(req1) assert_that(resp.result, is_(RegisterHostResultCode.OK)) resp = self.chairman_client.register_host(req2) assert_that(resp.result, is_(RegisterHostResultCode.OK)) hosts = get_hosts_from_zk(self.zk_client) # map list to dict indexed by host id hosts = dict((h.id, h) for h in hosts) assert_that(len(hosts), is_(2)) _h1 = hosts[req1.config.agent_id] _h2 = hosts[req2.config.agent_id] # Verify that the requests match the hosts that were # constructed by get_hosts_from_zk assert_that(req1.config.agent_id, _h1.id) assert_that(req2.config.agent_id, _h2.id) assert_that(req1.config.address.host, _h1.address) assert_that(req2.config.address.port, _h2.port) def test_get_missing_hosts_from_zk(self): missing = get_missing_hosts_from_zk(self.zk_client) assert_that(len(missing), is_(0)) missing_hosts = ["h2", "h3"] req = ReportMissingRequest("host1", None, missing_hosts) resp = self.chairman_client.report_missing(req) assert_that(resp.result, is_(ReportMissingResultCode.OK)) missing = get_missing_hosts_from_zk(self.zk_client) assert_that(missing[0] in missing_hosts, is_(True)) assert_that(missing[0] in missing_hosts, is_(True))