def test_get_available_nodes_central_mode_remove_libE_proc(): mynode = socket.gethostname() nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=True) #Now mock up some more stuff - so consistent #Spoof current process as each worker and check nodelist. resources.num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Spoof current process as each worker and check nodelist. resources.num_workers = 4 exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Spoof current process as each worker and check nodelist. resources.num_workers = 1 exp_out = [[ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ]] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" #Test the best_split algorithm resources.num_workers = 3 exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(resources.num_workers): resources.workerID = wrk + 1 local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" os.remove('worker_list')
def test_get_local_nodelist_central_mode(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=True) # Now mock up some more stuff - so consistent # Spoof current process as each worker and check nodelist. num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" # Spoof current process as each worker and check nodelist. num_workers = 4 exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" # Spoof current process as each worker and check nodelist. num_workers = 1 exp_out = [[ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ]] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected" # Test the best_split algorithm num_workers = 3 exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']] for wrk in range(num_workers): workerID = wrk + 1 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out[ wrk], "local_nodelist returned does not match expected"
def test_worker_resources(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=True) # One worker per node exp_nodelist1 = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] num_workers = 8 comm = Fake_comm(num_workers) for wrk in range(num_workers): workerID = wrk + 1 worker = WorkerResources(workerID, comm, resources) assert worker.num_workers == 8, 'worker.num_workers does not match' assert worker.workerID == workerID, 'worker.workerID does not match' assert worker.local_nodelist == exp_nodelist1[ wrk], 'worker.local_nodelist does not match' assert worker.local_node_count == 1, 'worker.local_node_count does not match' assert worker.workers_per_node == 1, 'worker.workers_per_node does not match' # Multiple nodes per worker exp_nodelist2 = [['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']] num_workers = 2 comm2 = Fake_comm(num_workers) for wrk in range(num_workers): workerID = wrk + 1 worker = WorkerResources(workerID, comm2, resources) assert worker.num_workers == 2, 'worker.num_workers does not match' assert worker.workerID == workerID, 'worker.workerID does not match' assert worker.local_nodelist == exp_nodelist2[ wrk], 'worker.local_nodelist does not match' assert worker.local_node_count == 4, 'worker.local_node_count does not match' assert worker.workers_per_node == 1, 'worker.workers_per_node does not match' # Multiple workers per node num_workers = 16 comm3 = Fake_comm(num_workers) for wrk in range(num_workers): workerID = wrk + 1 worker = WorkerResources(workerID, comm3, resources) assert worker.num_workers == 16, 'worker.num_workers does not match' assert worker.workerID == workerID, 'worker.workerID does not match' assert worker.local_nodelist == exp_nodelist1[ wrk // 2], 'worker.local_nodelist does not match' assert worker.local_node_count == 1, 'worker.local_node_count does not match' assert worker.workers_per_node == 2, 'worker.workers_per_node does not match'
def test_get_local_nodelist_distrib_mode_host_not_in_list(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=False) # Spoof current process as each worker and check nodelist. num_workers = 4 exp_out = ['knl-0022', 'knl-0036'] # Test running distributed mode without current host in list. workerID = 2 local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) # Now this should work assert local_nodelist == exp_out, "local_nodelist returned does not match expected"
def test_get_available_nodes_distrib_mode_host_not_in_list(): os.environ[ "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]" resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=False) #Spoof current process as each worker and check nodelist. resources.num_workers = 8 exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'], ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']] # Test running distributed mode without current host in list. resources.workerID = 2 try: local_nodelist = resources.get_available_nodes() except: assert 1 else: assert 0
def test_get_available_nodes_distrib_mode_uneven_split(): mynode = socket.gethostname() nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 4: f.write(mynode + '\n') resources = Resources(central_mode=False) resources.num_workers = 2 # May not be at head of list - should perhaps be warning or enforced resources.workerID = 2 exp_out = ['knl-0137', mynode, 'knl-0138', 'knl-0139'] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('worker_list')
def test_get_local_nodelist_distrib_mode(): mynode = socket.gethostname() # nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234'] nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=False) # Spoof current process as each worker and check nodelist. num_workers = 8 # Test workerID not in local_nodelist [update: This should now work - check removed] # workerID = 4 # try: # local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources) # except: # assert 1 # else: # assert 0 workerID = 5 exp_out = [mynode] local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" num_workers = 1 workerID = 1 exp_out = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', mynode, 'knl-0137', 'knl-0138', 'knl-0139' ] local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" num_workers = 4 workerID = 3 exp_out = [mynode, 'knl-0137'] local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" # Sub-node workers num_workers = 16 workerID = 9 exp_out = [mynode] local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" workerID = 10 exp_out = [mynode] local_nodelist = WorkerResources.get_local_nodelist( num_workers, workerID, resources) assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('worker_list')
def test_get_available_nodes_distrib_mode(): mynode = socket.gethostname() #nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137','knl-0138', 'knl-0139', 'knl-1234'] nodelist_in = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139' ] with open('worker_list', 'w') as f: for i, node in enumerate(nodelist_in): f.write(node + '\n') if i == 3: f.write(mynode + '\n') resources = Resources(central_mode=False) #Spoof current process as each worker and check nodelist. resources.num_workers = 8 #Test workerID not in local_nodelist resources.workerID = 4 try: local_nodelist = resources.get_available_nodes() except: assert 1 else: assert 0 resources.workerID = 5 exp_out = [mynode] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" resources.num_workers = 1 resources.workerID = 1 exp_out = [ 'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', mynode, 'knl-0137', 'knl-0138', 'knl-0139' ] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" resources.num_workers = 4 resources.workerID = 3 exp_out = [mynode, 'knl-0137'] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" #Sub-node workers resources.num_workers = 16 resources.workerID = 9 exp_out = [mynode] local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" resources.workerID = 10 exp_out = [mynode] #import pdb; pdb.set_trace() local_nodelist = resources.get_available_nodes() assert local_nodelist == exp_out, "local_nodelist returned does not match expected" os.remove('worker_list')