def test_embedded_objectrefs(workflow_start_regular): workflow_id = test_workflow_storage.__name__ class ObjectRefsWrapper: def __init__(self, refs): self.refs = refs wf_storage = workflow_storage.WorkflowStorage(workflow_id, storage.get_global_storage()) url = storage.get_global_storage().storage_url wrapped = ObjectRefsWrapper([ray.put(1), ray.put(2)]) asyncio_run(wf_storage._put(["key"], wrapped)) # Be extremely explicit about shutting down. We want to make sure the # `_get` call deserializes the full object and puts it in the object store. # Shutting down the cluster should guarantee we don't accidently get the # old object and pass the test. ray.shutdown() subprocess.check_output("ray stop --force", shell=True) workflow.init(url) storage2 = get_workflow_storage(workflow_id) result = asyncio_run(storage2._get(["key"])) assert ray.get(result.refs) == [1, 2]
def test_workflow_lifetime_2(call_ray_start): # Case 2: driver terminated proc = run_string_as_driver_nonblocking(driver_script.format(100)) time.sleep(10) proc.kill() time.sleep(1) workflow.init() output = workflow.get_output("driver_terminated") assert ray.get(output) == 20
def test_recovery_cluster_failure(): subprocess.check_call(["ray", "start", "--head"]) time.sleep(1) proc = run_string_as_driver_nonblocking(driver_script) time.sleep(10) subprocess.check_call(["ray", "stop"]) proc.kill() time.sleep(1) workflow.init() assert ray.get(workflow.resume("cluster_failure")) == 20 ray.shutdown()
def test_recovery_cluster_failure(reset_workflow, tmp_path): subprocess.check_call(["ray", "start", "--head"]) time.sleep(1) proc = run_string_as_driver_nonblocking( driver_script.format(tmp_path=str(tmp_path))) time.sleep(10) subprocess.check_call(["ray", "stop"]) proc.kill() time.sleep(1) workflow.init(str(tmp_path)) assert ray.get(workflow.resume("cluster_failure")) == 20 workflow.storage.set_global_storage(None) ray.shutdown()
def _workflow_start(storage_url, shared, **kwargs): init_kwargs = get_default_fixture_ray_kwargs() init_kwargs.update(kwargs) if ray.is_initialized(): ray.shutdown() storage.set_global_storage(None) # Sometimes pytest does not cleanup all global variables. # we have to manually reset the workflow storage. This # should not be an issue for normal use cases, because global variables # are freed after the driver exits. address_info = ray.init(**init_kwargs) workflow.init(storage_url) yield address_info # The code after the yield will run as teardown code. ray.shutdown() storage.set_global_storage(None)
def test_recovery_cluster_failure_resume_all(reset_workflow, tmp_path): tmp_path = tmp_path subprocess.check_call(["ray", "start", "--head"]) time.sleep(1) workflow_dir = tmp_path / "workflow" lock_file = tmp_path / "lock_file" driver_script = f""" import time from ray.experimental import workflow from filelock import FileLock @workflow.step def foo(x): with FileLock("{str(lock_file)}"): return 20 if __name__ == "__main__": workflow.init("{str(workflow_dir)}") assert foo.step(0).run(workflow_id="cluster_failure") == 20 """ lock = FileLock(lock_file) lock.acquire() proc = run_string_as_driver_nonblocking(driver_script) time.sleep(10) subprocess.check_call(["ray", "stop"]) proc.kill() time.sleep(1) lock.release() workflow.init(str(workflow_dir)) resumed = workflow.resume_all() assert len(resumed) == 1 (wid, obj_ref) = resumed[0] assert wid == "cluster_failure" assert ray.get(obj_ref) == 20 workflow.storage.set_global_storage(None) ray.shutdown()
def test_resume_different_storage(ray_start_regular, tmp_path, reset_workflow): workflow.init(storage=str(tmp_path)) constant.step().run(workflow_id="const") assert ray.get(workflow.resume(workflow_id="const")) == 31416 workflow.storage.set_global_storage(None)
def test_workflow_lifetime_1(call_ray_start): # Case 1: driver exits normally run_string_as_driver(driver_script.format(5)) workflow.init() output = workflow.get_output("driver_terminated") assert ray.get(output) == 20
def test_init_twice(call_ray_start, reset_workflow, tmp_path): workflow.init() with pytest.raises(RuntimeError): workflow.init(str(tmp_path))
def test_init_twice_2(call_ray_start, reset_workflow, tmp_path): run_string_as_driver(driver_script) with pytest.raises(RuntimeError): workflow.init(str(tmp_path))
def test_resume_different_storage(ray_start_regular): tmp_dir = tempfile.mkdtemp() workflow.init(storage=tmp_dir) constant.step().run(workflow_id="const") assert ray.get(workflow.resume(workflow_id="const")) == 31416 shutil.rmtree(tmp_dir)
def test_init_twice(tmp_path): workflow.init() with pytest.raises(RuntimeError): workflow.init(str(tmp_path))
def _alter_storage(new_storage): set_global_storage(new_storage) # alter the storage ray.shutdown() os.system("ray stop --force") workflow.init(new_storage)