def test_failure_with_storage(workflow_start_regular): with tempfile.TemporaryDirectory() as temp_dir: debug_store = DebugStorage(temp_dir) _alter_storage(debug_store) wf = construct_workflow(length=3) result = wf.run(workflow_id="complex_workflow") index = _locate_initial_commit(debug_store) + 1 debug_store.log_off() def resume(num_records_replayed): key = debug_store.wrapped_storage.make_key("complex_workflow") asyncio_run(debug_store.wrapped_storage.delete_prefix(key)) async def replay(): # We need to replay one by one to avoid conflict for i in range(num_records_replayed): await debug_store.replay(i) asyncio_run(replay()) return ray.get(workflow.resume(workflow_id="complex_workflow")) with pytest.raises(ValueError): # in cases, the replayed records are too few to resume the # workflow. resume(index - 1) if isinstance(debug_store.wrapped_storage, FilesystemStorageImpl): # filesystem is faster, so we can cover all cases step_len = 1 else: step_len = max((len(debug_store) - index) // 5, 1) for j in range(index, len(debug_store), step_len): assert resume(j) == result
def test_nested_workflow_no_download(workflow_start_regular): """Test that we _only_ load from storage on recovery. For a nested workflow step, we should checkpoint the input/output, but continue to reuse the in-memory value. """ @ray.remote def recursive(ref, count): if count == 0: return ref return workflow.continuation(recursive.bind(ref, count - 1)) with tempfile.TemporaryDirectory() as temp_dir: debug_store = DebugStorage(temp_dir) utils._alter_storage(debug_store) ref = ray.put("hello") result = workflow.create(recursive.bind([ref], 10)).run() ops = debug_store._logged_storage.get_op_counter() get_objects_count = 0 for key in ops["get"]: if "objects" in key: get_objects_count += 1 assert get_objects_count == 1, "We should only get once when resuming." put_objects_count = 0 for key in ops["put"]: if "objects" in key: print(key) put_objects_count += 1 assert (put_objects_count == 1 ), "We should detect the object exists before uploading" assert ray.get(result) == ["hello"]
def test_dedupe_download_raw_ref(workflow_start_regular): with tempfile.TemporaryDirectory() as temp_dir: debug_store = DebugStorage(temp_dir) utils._alter_storage(debug_store) ref = ray.put("hello") workflows = [identity.bind(ref) for _ in range(100)] workflow.create(gather.bind(*workflows)).run() ops = debug_store._logged_storage.get_op_counter() get_objects_count = 0 for key in ops["get"]: if "objects" in key: get_objects_count += 1 assert get_objects_count == 1
def test_dedupe_downloads_list(workflow_start_regular): with tempfile.TemporaryDirectory() as temp_dir: debug_store = DebugStorage(get_global_storage(), temp_dir) utils._alter_storage(debug_store) numbers = [ray.put(i) for i in range(5)] workflows = [identity.bind(numbers) for _ in range(100)] workflow.create(gather.bind(*workflows)).run() ops = debug_store._logged_storage.get_op_counter() get_objects_count = 0 for key in ops["get"]: if "objects" in key: get_objects_count += 1 assert get_objects_count == 5