def test_get_local_nodelist_distrib_mode():
    mynode = socket.gethostname()
    # nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']
    nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139']
    with open('node_list', 'w') as f:
        for i, node in enumerate(nodelist_in):
            f.write(node + '\n')
            if i == 3:
                f.write(mynode + '\n')

    resources = Resources(central_mode=False)

    # Spoof current process as each worker and check nodelist.
    num_workers = 8

    # Test workerID not in local_nodelist [update: This should now work - check removed]
    # workerID = 4
    # try:
    #     local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    # except:
    #     assert 1
    # else:
    #     assert 0

    workerID = 5
    exp_node = mynode  # sname(mynode)
    exp_out = [exp_node]
    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"

    num_workers = 1
    workerID = 1
    exp_out = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', exp_node, 'knl-0137', 'knl-0138', 'knl-0139']
    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"

    num_workers = 4
    workerID = 3
    exp_out = [exp_node, 'knl-0137']
    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"

    # Sub-node workers
    num_workers = 16

    workerID = 9
    exp_out = [exp_node]
    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"

    workerID = 10
    exp_out = [exp_node]

    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"
    os.remove('node_list')
def test_get_local_nodelist_central_mode():
    os.environ[
        "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]"
    resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST",
                          central_mode=True)

    # Now mock up some more stuff - so consistent

    # Spoof current process as each worker and check nodelist.
    num_workers = 8
    exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'],
               ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(
            num_workers, workerID, resources)
        assert local_nodelist == exp_out[
            wrk], "local_nodelist returned does not match expected"

    # Spoof current process as each worker and check nodelist.
    num_workers = 4
    exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'],
               ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(
            num_workers, workerID, resources)
        assert local_nodelist == exp_out[
            wrk], "local_nodelist returned does not match expected"

    # Spoof current process as each worker and check nodelist.
    num_workers = 1
    exp_out = [[
        'knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138',
        'knl-0139', 'knl-1234'
    ]]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(
            num_workers, workerID, resources)
        assert local_nodelist == exp_out[
            wrk], "local_nodelist returned does not match expected"

    # Test the best_split algorithm
    num_workers = 3
    exp_out = [['knl-0020', 'knl-0021', 'knl-0022'],
               ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(
            num_workers, workerID, resources)
        assert local_nodelist == exp_out[
            wrk], "local_nodelist returned does not match expected"
def test_get_local_nodelist_distrib_mode_host_not_in_list():
    os.environ["LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]"
    resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST", central_mode=False)

    # Spoof current process as each worker and check nodelist.
    num_workers = 4
    exp_out = ['knl-0022', 'knl-0036']

    # Test running distributed mode without current host in list.
    workerID = 2
    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)

    # Now this should work
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"
def test_get_local_nodelist_central_mode_remove_libE_proc():
    mynode = socket.gethostname()
    nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']
    with open('node_list', 'w') as f:
        for i, node in enumerate(nodelist_in):
            f.write(node + '\n')
            if i == 3:
                f.write(mynode + '\n')

    resources = Resources(central_mode=True)
    resources.add_comm_info(libE_nodes=[mynode])

    # Now mock up some more stuff - so consistent

    # Spoof current process as each worker and check nodelist.
    num_workers = 8
    exp_out = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'],
               ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
        assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected"

    # Spoof current process as each worker and check nodelist.
    num_workers = 4
    exp_out = [['knl-0020', 'knl-0021'], ['knl-0022', 'knl-0036'], ['knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
        assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected"

    # Spoof current process as each worker and check nodelist.
    num_workers = 1
    exp_out = [['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
        assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected"

    # Test the best_split algorithm
    num_workers = 3
    exp_out = [['knl-0020', 'knl-0021', 'knl-0022'], ['knl-0036', 'knl-0137', 'knl-0138'], ['knl-0139', 'knl-1234']]
    for wrk in range(num_workers):
        workerID = wrk + 1
        local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
        assert local_nodelist == exp_out[wrk], "local_nodelist returned does not match expected"

    os.remove('node_list')
def test_worker_resources():
    os.environ[
        "LIBE_RESOURCES_TEST_NODE_LIST"] = "knl-[0020-0022,0036,0137-0139,1234]"
    resources = Resources(nodelist_env_slurm="LIBE_RESOURCES_TEST_NODE_LIST",
                          central_mode=True)

    # One worker per node
    exp_nodelist1 = [['knl-0020'], ['knl-0021'], ['knl-0022'], ['knl-0036'],
                     ['knl-0137'], ['knl-0138'], ['knl-0139'], ['knl-1234']]
    num_workers = 8
    comm = Fake_comm(num_workers)
    for wrk in range(num_workers):
        workerID = wrk + 1
        worker = WorkerResources(workerID, comm, resources)
        assert worker.num_workers == 8, 'worker.num_workers does not match'
        assert worker.workerID == workerID, 'worker.workerID does not match'
        assert worker.local_nodelist == exp_nodelist1[
            wrk], 'worker.local_nodelist does not match'
        assert worker.local_node_count == 1, 'worker.local_node_count does not match'
        assert worker.workers_per_node == 1, 'worker.workers_per_node does not match'

    # Multiple nodes per worker
    exp_nodelist2 = [['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036'],
                     ['knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']]
    num_workers = 2
    comm2 = Fake_comm(num_workers)
    for wrk in range(num_workers):
        workerID = wrk + 1
        worker = WorkerResources(workerID, comm2, resources)
        assert worker.num_workers == 2, 'worker.num_workers does not match'
        assert worker.workerID == workerID, 'worker.workerID does not match'
        assert worker.local_nodelist == exp_nodelist2[
            wrk], 'worker.local_nodelist does not match'
        assert worker.local_node_count == 4, 'worker.local_node_count does not match'
        assert worker.workers_per_node == 1, 'worker.workers_per_node does not match'

    # Multiple workers per node
    num_workers = 16
    comm3 = Fake_comm(num_workers)
    for wrk in range(num_workers):
        workerID = wrk + 1
        worker = WorkerResources(workerID, comm3, resources)
        assert worker.num_workers == 16, 'worker.num_workers does not match'
        assert worker.workerID == workerID, 'worker.workerID does not match'
        assert worker.local_nodelist == exp_nodelist1[
            wrk // 2], 'worker.local_nodelist does not match'
        assert worker.local_node_count == 1, 'worker.local_node_count does not match'
        assert worker.workers_per_node == 2, 'worker.workers_per_node does not match'
def test_get_local_nodelist_distrib_mode_uneven_split():
    mynode = socket.gethostname()
    exp_node = mynode  # sname(mynode)
    nodelist_in = ['knl-0020', 'knl-0021', 'knl-0022', 'knl-0036', 'knl-0137', 'knl-0138', 'knl-0139', 'knl-1234']
    with open('node_list', 'w') as f:
        for i, node in enumerate(nodelist_in):
            f.write(node + '\n')
            if i == 4:
                f.write(mynode + '\n')

    resources = Resources(central_mode=False)
    num_workers = 2

    # May not be at head of list - should perhaps be warning or enforced
    workerID = 2
    exp_out = ['knl-0137', exp_node, 'knl-0138', 'knl-0139']
    local_nodelist = WorkerResources.get_local_nodelist(num_workers, workerID, resources)
    assert local_nodelist == exp_out, "local_nodelist returned does not match expected"
    os.remove('node_list')
Beispiel #7
0
 def set_worker_info(self, comm, workerid=None):
     """Sets info for this executor"""
     self.workerID = workerid
     if not self.resources:
         self.resources = Resources()
     self.resources.set_worker_resources(self.workerID, comm)