def test_nonmpi_unit_with_continuous_scheduler(mocked_init, mocked_method, mocked_profiler, mocked_raise_on): cfg, session = setUp() component = Continuous(cfg=dict(), session=session) component._lrms_info = cfg['lrms_info'] component._lrms_lm_info = cfg['lrms_info']['lm_info'] component._lrms_node_list = cfg['lrms_info']['node_list'] component._lrms_cores_per_node = cfg['lrms_info']['cores_per_node'] component._lrms_gpus_per_node = cfg['lrms_info']['gpus_per_node'] component._lrms_lfs_per_node = cfg['lrms_info']['lfs_per_node'] component._tag_history = dict() component.nodes = [] for node, node_uid in component._lrms_node_list: component.nodes.append( copy.deepcopy({ 'name': node, 'uid': node_uid, 'cores': [rpc.FREE] * component._lrms_cores_per_node, 'gpus': [rpc.FREE] * component._lrms_gpus_per_node, 'lfs': component._lrms_lfs_per_node })) # Allocate first CUD -- should land on first node cud = nompi() slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'cores_per_node': 2, 'gpus_per_node': 1, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'core_map': [[0]], 'name': 'a', 'gpu_map': [], 'uid': 1, 'lfs': { 'size': 1024, 'path': 'abc' } }] } # Assert resulting node list values after first CUD assert component.nodes == [{ 'uid': 1, 'name': 'a', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 4096, 'path': 'abc' } }, { 'uid': 2, 'name': 'b', 'cores': [0, 0], 'gpus': [0], 'lfs': { 'size': 5120, 'path': 'abc' } }, { 'uid': 3, 'name': 'c', 'cores': [0, 0], 'gpus': [0], 'lfs': { 'size': 5120, 'path': 'abc' } }, { 'uid': 4, 'name': 'd', 'cores': [0, 0], 'gpus': [0], 'lfs': { 'size': 5120, 'path': 'abc' } }, { 'uid': 5, 'name': 'e', 'cores': [0, 0], 'gpus': [0], 'lfs': { 'size': 5120, 'path': 'abc' } }] # Allocate second CUD -- should land on first node cud = nompi() slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'cores_per_node': 2, 'gpus_per_node': 1, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 1, 'name': 'a', 'core_map': [[1]], 'gpu_map': [], 'lfs': { 'size': 1024, 'path': 'abc' } }] } # Allocate third CUD -- should land on second node cud = nompi() slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'gpus_per_node': 1, 'cores_per_node': 2, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 2, 'name': 'b', 'core_map': [[0]], 'gpu_map': [], 'lfs': { 'size': 1024, 'path': 'abc' } }] } # Allocate four CUD -- should land on third node cud = nompi() cud['lfs_per_process'] = 5120 slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'cores_per_node': 2, 'gpus_per_node': 1, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 3, 'name': 'c', 'core_map': [[0]], 'gpu_map': [], 'lfs': { 'size': 5120, 'path': 'abc' } }] } # Fail with ValueError if lfs required by cud is more than available with pytest.raises(ValueError): cud = nompi() cud['lfs_per_process'] = 6000 slot = component._allocate_slot(cud) # Max out available resources # Allocate two CUDs -- should land on fourth and fifth node cud = nompi() cud['lfs_per_process'] = 5120 slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'cores_per_node': 2, 'gpus_per_node': 1, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 4, 'name': 'd', 'core_map': [[0]], 'gpu_map': [], 'lfs': { 'size': 5120, 'path': 'abc' } }] } slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'cores_per_node': 2, 'gpus_per_node': 1, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 5, 'name': 'e', 'core_map': [[0]], 'gpu_map': [], 'lfs': { 'size': 5120, 'path': 'abc' } }] } # Allocate CUD with to land on second node cud = nompi() cud['lfs_per_process'] = 4096 slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'cores_per_node': 2, 'gpus_per_node': 1, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 2, 'name': 'b', 'core_map': [[1]], 'gpu_map': [], 'lfs': { 'size': 4096, 'path': 'abc' } }] } # Allocate CUD with no lfs requirement cud = nompi() cud['lfs_per_process'] = 0 slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'gpus_per_node': 1, 'cores_per_node': 2, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 3, 'name': 'c', 'core_map': [[1]], 'gpu_map': [], 'lfs': { 'size': 0, 'path': 'abc' } }] } # Deallocate slot component._release_slot(slot) assert component.nodes == [{ 'uid': 1, 'name': 'a', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 3072, 'path': 'abc' } }, { 'uid': 2, 'name': 'b', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 3, 'name': 'c', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 4, 'name': 'd', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 5, 'name': 'e', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }] # Allocate CUD which cannot fit on available resources cud = nompi() cud['lfs_per_process'] = 5120 slot = component._allocate_slot(cud) assert slot == None # Deallocate third node slot = { 'lm_info': 'INFO', 'gpus_per_node': 1, 'cores_per_node': 2, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 3, 'name': 'c', 'core_map': [[0]], 'gpu_map': [], 'lfs': { 'path': 'abc', 'size': 5120 } }] } component._release_slot(slot) assert component.nodes == [{ 'uid': 1, 'name': 'a', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 3072, 'path': 'abc' } }, { 'uid': 2, 'name': 'b', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 3, 'name': 'c', 'cores': [0, 0], 'gpus': [0], 'lfs': { 'size': 5120, 'path': 'abc' } }, { 'uid': 4, 'name': 'd', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 5, 'name': 'e', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }] # Allocate CUD to run multi threaded application cud = nompi() cud['cpu_processes'] = 1 cud['cpu_threads'] = 2 slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'gpus_per_node': 1, 'cores_per_node': 2, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 3, 'name': 'c', 'core_map': [[0, 1]], 'gpu_map': [], 'lfs': { 'size': 1024, 'path': 'abc' } }] } assert component.nodes == [{ 'uid': 1, 'name': 'a', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 3072, 'path': 'abc' } }, { 'uid': 2, 'name': 'b', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 3, 'name': 'c', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 4096, 'path': 'abc' } }, { 'uid': 4, 'name': 'd', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 5, 'name': 'e', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }] # Deallocate slot component._release_slot(slot) # Allocate CUD to run multi process, non-mpi application cud = nompi() cud['cpu_processes'] = 2 cud['cpu_threads'] = 1 cud['lfs_per_process'] = 1024 slot = component._allocate_slot(cud) assert slot == { 'lm_info': 'INFO', 'gpus_per_node': 1, 'cores_per_node': 2, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'uid': 3, 'name': 'c', 'core_map': [[0], [1]], 'gpu_map': [], 'lfs': { 'size': 2048, 'path': 'abc' } }] } assert component.nodes == [{ 'uid': 1, 'name': 'a', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 3072, 'path': 'abc' } }, { 'uid': 2, 'name': 'b', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 3, 'name': 'c', 'cores': [1, 1], 'gpus': [0], 'lfs': { 'size': 3072, 'path': 'abc' } }, { 'uid': 4, 'name': 'd', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }, { 'uid': 5, 'name': 'e', 'cores': [1, 0], 'gpus': [0], 'lfs': { 'size': 0, 'path': 'abc' } }] tearDown()
def test_mpi_unit_with_continuous_scheduler(mocked_init, mocked_method, mocked_profiler, mocked_raise_on): cfg, session = setUp() component = Continuous(cfg={'owner': 'parent'}, session=session) component._scattered = True component._lrms_info = cfg['lrms_info'] component._lrms_lm_info = cfg['lrms_info']['lm_info'] component._lrms_node_list = cfg['lrms_info']['node_list'] component._lrms_cores_per_node = cfg['lrms_info']['cores_per_node'] component._lrms_gpus_per_node = cfg['lrms_info']['gpus_per_node'] component._lrms_lfs_per_node = cfg['lrms_info']['lfs_per_node'] component._tag_history = dict() component._log = ru.get_logger('test.component') component.nodes = [] for node, node_uid in component._lrms_node_list: component.nodes.append( copy.deepcopy({ 'name': node, 'uid': node_uid, 'cores': [rpc.FREE] * component._lrms_cores_per_node, 'gpus': [rpc.FREE] * component._lrms_gpus_per_node, 'lfs': component._lrms_lfs_per_node })) # Allocate first CUD -- should land on first node cud = mpi() cud['cpu_processes'] = 2 cud['cpu_threads'] = 1 cud['lfs_per_process'] = 1024 slot = component._allocate_slot(cud) assert slot == { 'cores_per_node': component._lrms_cores_per_node, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'lfs': { 'size': 2048, 'path': 'abc' }, 'core_map': [[0], [1]], 'name': 'a', 'gpu_map': [], 'uid': 1 }], 'lm_info': 'INFO', 'gpus_per_node': component._lrms_gpus_per_node } # Assert resulting node list values after first CUD assert component.nodes == [{ 'lfs': { 'size': 3072, 'path': 'abc' }, 'cores': [1, 1, 0, 0], 'name': 'a', 'gpus': [0], 'uid': 1 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'b', 'gpus': [0], 'uid': 2 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'c', 'gpus': [0], 'uid': 3 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'd', 'gpus': [0], 'uid': 4 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'e', 'gpus': [0], 'uid': 5 }] # Allocate second CUD -- should land on first node cud = mpi() cud['cpu_processes'] = 1 cud['cpu_threads'] = 2 cud['lfs_per_process'] = 1024 slot = component._allocate_slot(cud) assert slot == { 'cores_per_node': component._lrms_cores_per_node, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'lfs': { 'size': 1024, 'path': 'abc' }, 'core_map': [[2, 3]], 'name': 'a', 'gpu_map': [], 'uid': 1 }], 'lm_info': 'INFO', 'gpus_per_node': component._lrms_gpus_per_node } # Assert resulting node list values after second CUD assert component.nodes == [{ 'lfs': { 'size': 2048, 'path': 'abc' }, 'cores': [1, 1, 1, 1], 'name': 'a', 'gpus': [0], 'uid': 1 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'b', 'gpus': [0], 'uid': 2 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'c', 'gpus': [0], 'uid': 3 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'd', 'gpus': [0], 'uid': 4 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'e', 'gpus': [0], 'uid': 5 }] # Allocate third CUD -- should land on second node since no cores are # available on the first cud = mpi() cud['cpu_processes'] = 1 cud['cpu_threads'] = 1 cud['lfs_per_process'] = 1024 slot = component._allocate_slot(cud) assert slot == { 'cores_per_node': component._lrms_cores_per_node, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'lfs': { 'size': 1024, 'path': 'abc' }, 'core_map': [[0]], 'name': 'b', 'gpu_map': [], 'uid': 2 }], 'lm_info': 'INFO', 'gpus_per_node': component._lrms_gpus_per_node } # Assert resulting node list values after third CUD assert component.nodes == [{ 'lfs': { 'size': 2048, 'path': 'abc' }, 'cores': [1, 1, 1, 1], 'name': 'a', 'gpus': [0], 'uid': 1 }, { 'lfs': { 'size': 4096, 'path': 'abc' }, 'cores': [1, 0, 0, 0], 'name': 'b', 'gpus': [0], 'uid': 2 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'c', 'gpus': [0], 'uid': 3 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'd', 'gpus': [0], 'uid': 4 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'e', 'gpus': [0], 'uid': 5 }] # Allocate fourth CUD -- should land on second and third nodes cud = mpi() cud['cpu_processes'] = 2 cud['cpu_threads'] = 2 cud['lfs_per_process'] = 1024 slot = component._allocate_slot(cud) assert slot == { 'cores_per_node': component._lrms_cores_per_node, 'lfs_per_node': component._lrms_lfs_per_node, 'nodes': [{ 'lfs': { 'size': 1024, 'path': 'abc' }, 'core_map': [[1, 2]], 'name': 'b', 'gpu_map': [], 'uid': 2 }, { 'lfs': { 'size': 1024, 'path': 'abc' }, 'core_map': [[0, 1]], 'name': 'c', 'gpu_map': [], 'uid': 3 }], 'lm_info': 'INFO', 'gpus_per_node': component._lrms_gpus_per_node } # Assert resulting node list values after fourth CUD assert component.nodes == [{ 'lfs': { 'size': 2048, 'path': 'abc' }, 'cores': [1, 1, 1, 1], 'name': 'a', 'gpus': [0], 'uid': 1 }, { 'lfs': { 'size': 3072, 'path': 'abc' }, 'cores': [1, 1, 1, 0], 'name': 'b', 'gpus': [0], 'uid': 2 }, { 'lfs': { 'size': 4096, 'path': 'abc' }, 'cores': [1, 1, 0, 0], 'name': 'c', 'gpus': [0], 'uid': 3 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'd', 'gpus': [0], 'uid': 4 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'e', 'gpus': [0], 'uid': 5 }] # Deallocate slot component._release_slot(slot) assert component.nodes == [{ 'lfs': { 'size': 2048, 'path': 'abc' }, 'cores': [1, 1, 1, 1], 'name': 'a', 'gpus': [0], 'uid': 1 }, { 'lfs': { 'size': 4096, 'path': 'abc' }, 'cores': [1, 0, 0, 0], 'name': 'b', 'gpus': [0], 'uid': 2 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'c', 'gpus': [0], 'uid': 3 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'd', 'gpus': [0], 'uid': 4 }, { 'lfs': { 'size': 5120, 'path': 'abc' }, 'cores': [0, 0, 0, 0], 'name': 'e', 'gpus': [0], 'uid': 5 }] tearDown()