def test_colocator(tmpdir, ray_start_6_cpus): SetColocator = NodeColocator.options(num_cpus=4) colocator = SetColocator.remote( node_rank=4, num_slots=4, world_size=5, use_gpu=False) colocator.create_workers.remote() worker_handles = ray.get(colocator.get_workers.remote()) assert len(set(ray.get( [h.hostname.remote() for h in worker_handles]))) == 1 resources = ray.available_resources() ip_address = services.get_node_ip_address() assert resources.get("CPU", 0) == 2, resources
def test_colocator_gpu(tmpdir, ray_start_4_cpus_4_gpus): SetColocator = NodeColocator.options(num_cpus=4, num_gpus=4) colocator = SetColocator.remote( node_rank=0, num_slots=4, world_size=4, use_gpu=True) colocator.create_workers.remote() worker_handles = ray.get(colocator.get_workers.remote()) assert len(set(ray.get( [h.hostname.remote() for h in worker_handles]))) == 1 resources = ray.available_resources() ip_address = ray.services.get_node_ip_address() assert resources.get("CPU", 0) == 0, resources assert resources.get("GPU", 0) == 0, resources assert resources.get(f"node:{ip_address}", 0) == 1 - 4 * 0.01 all_envs = ray.get([h.env_vars.remote() for h in worker_handles]) assert len({ev["CUDA_VISIBLE_DEVICES"] for ev in all_envs}) == 1 assert len(all_envs[0]["CUDA_VISIBLE_DEVICES"].split(",")) == 4