def test_worker_creation_num_cpus(ray_start_2_cpus): assert ray.available_resources()["CPU"] == 2 wg = WorkerGroup(num_cpus_per_worker=2) time.sleep(1) assert len(wg.workers) == 1 # Make sure both CPUs are being used by the actor. assert "CPU" not in ray.available_resources() wg.shutdown()
def test_worker_restart(ray_start_2_cpus): wg = WorkerGroup(num_workers=2) with pytest.raises(RuntimeError): wg.start() # Avoid race condition. time.sleep(1) wg.shutdown(0) wg.start() wg.execute(lambda: 1)
def handle_failure(self, worker_group: WorkerGroup, failed_worker_indexes: List[int], backend_config: BackendConfig): """Logic for handling failures. By default, restart all workers. """ worker_group.shutdown() worker_group.start() self.on_start(worker_group, backend_config)
def test_worker_shutdown(ray_start_2_cpus): assert ray.available_resources()["CPU"] == 2 wg = WorkerGroup(num_workers=2) time.sleep(1) assert "CPU" not in ray.available_resources() assert len(ray.state.actors()) == 2 wg.shutdown() time.sleep(1) assert ray.available_resources()["CPU"] == 2 with pytest.raises(RuntimeError): wg.execute(lambda: 1)