Example #1
0
def test_colocator(tmpdir, ray_start_6_cpus):
    SetColocator = NodeColocator.options(num_cpus=4)
    colocator = SetColocator.remote(
        node_rank=4, num_slots=4, world_size=5, use_gpu=False)
    colocator.create_workers.remote()
    worker_handles = ray.get(colocator.get_workers.remote())
    assert len(set(ray.get(
        [h.hostname.remote() for h in worker_handles]))) == 1

    resources = ray.available_resources()
    ip_address = services.get_node_ip_address()
    assert resources.get("CPU", 0) == 2, resources
Example #2
0
def test_colocator_gpu(tmpdir, ray_start_4_cpus_4_gpus):
    SetColocator = NodeColocator.options(num_cpus=4, num_gpus=4)
    colocator = SetColocator.remote(
        node_rank=0, num_slots=4, world_size=4, use_gpu=True)
    colocator.create_workers.remote()
    worker_handles = ray.get(colocator.get_workers.remote())
    assert len(set(ray.get(
        [h.hostname.remote() for h in worker_handles]))) == 1
    resources = ray.available_resources()
    ip_address = ray.services.get_node_ip_address()
    assert resources.get("CPU", 0) == 0, resources
    assert resources.get("GPU", 0) == 0, resources
    assert resources.get(f"node:{ip_address}", 0) == 1 - 4 * 0.01

    all_envs = ray.get([h.env_vars.remote() for h in worker_handles])
    assert len({ev["CUDA_VISIBLE_DEVICES"] for ev in all_envs}) == 1
    assert len(all_envs[0]["CUDA_VISIBLE_DEVICES"].split(",")) == 4