def test_worker_restart(ray_start_2_cpus): wg = WorkerGroup(num_workers=2) with pytest.raises(RuntimeError): wg.start() # Avoid race condition. time.sleep(1) wg.shutdown(0) wg.start() wg.execute(lambda: 1)
def handle_failure(self, worker_group: WorkerGroup, failed_worker_indexes: List[int], backend_config: BackendConfig): """Logic for handling failures. By default, restart all workers. """ worker_group.shutdown() worker_group.start() self.on_start(worker_group, backend_config)