def test_get_local_nodelist_distrib_mode(): mynode = socket.gethostname() # nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234'] nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139'] with open('node_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=False) # Spoof current process as each worker and check nodelist. num_workers = 8 # Test workerID not in local_nodelist [update: This should now work - check removed] # workerID = 4 # try: # local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) # except: # assert 1 # else: # assert 0 workerID = 5 exp_node = mynode # sname(mynode) exp_out = [exp_node] local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" num_workers = 1 workerID = 1 exp_out = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', exp_node, 'knl-0137', 'knl-0138', 'knl-0139'] local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" num_workers = 4 workerID = 3 exp_out = [exp_node, 'knl-0137'] local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" # Sub-node workers num_workers = 16 workerID = 9 exp_out = [exp_node] local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" workerID = 10 exp_out = [exp_node] local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('node_list')
def test_get_local_nodelist_central_mode(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=True) # Now mock up some more stuff - so consistent # Spoof current process as each worker and check nodelist. num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" # Spoof current process as each worker and check nodelist. num_workers = 4 exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" # Spoof current process as each worker and check nodelist. num_workers = 1 exp_out = [[ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ]] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" # Test the best_split algorithm num_workers = 3 exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected"
def test_get_local_nodelist_central_mode_remove_libE_proc(): mynode = socket.gethostname() nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234'] with open('node_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=True) resources.add_comm_info(libE_nodes=[mynode]) # Now mock up some more stuff - so consistent # Spoof current process as each worker and check nodelist. num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected" # Spoof current process as each worker and check nodelist. num_workers = 4 exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected" # Spoof current process as each worker and check nodelist. num_workers = 1 exp_out = [['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected" # Test the best_split algorithm num_workers = 3 exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected" os.remove('node_list')
def test_get_local_nodelist_distrib_mode_host_not_in_list(): os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=False) # Spoof current process as each worker and check nodelist. num_workers = 4 exp_out = ['knl-0022', 'knl-0036'] # Test running distributed mode without current host in list. workerID = 2 local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) # Now this should work assert local_nodelist == exp_out, "local_nodelist returned does not match expected"
def test_get_local_nodelist_distrib_mode_uneven_split(): mynode = socket.gethostname() exp_node = mynode # sname(mynode) nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234'] with open('node_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 4: f.write(mynode + '\n') resources = Resources(central_mode=False) num_workers = 2 # May not be at head of list - should perhaps be warning or enforced workerID = 2 exp_out = ['knl-0137', exp_node, 'knl-0138', 'knl-0139'] local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('node_list')