def test_embedded_objectrefs(workflow_start_regular): workflow_id = test_workflow_storage.__name__ class ObjectRefsWrapper: def __init__(self, refs): self.refs = refs wf_storage = workflow_storage.WorkflowStorage(workflow_id, storage.get_global_storage()) url = storage.get_global_storage().storage_url wrapped = ObjectRefsWrapper([ray.put(1), ray.put(2)]) asyncio_run(wf_storage._put(["key"], wrapped)) # Be extremely explicit about shutting down. We want to make sure the # `_get` call deserializes the full object and puts it in the object store. # Shutting down the cluster should guarantee we don't accidently get the # old object and pass the test. ray.shutdown() subprocess.check_output("ray stop --force", shell=True) workflow.init(url) storage2 = get_workflow_storage(workflow_id) result = asyncio_run(storage2._get(["key"])) assert ray.get(result.refs) == [1, 2]
def run(entry_workflow: Workflow, storage: Optional[Union[str, Storage]] = None, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. See "api.run()" for details.""" if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{entry_workflow.id}.{time.time():.9f}" if isinstance(storage, str): set_global_storage(create_storage(storage)) elif isinstance(storage, Storage): set_global_storage(storage) elif storage is not None: raise TypeError("'storage' should be None, str, or Storage type.") storage_url = get_global_storage().storage_url logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{storage_url}\"].") try: workflow_context.init_workflow_step_context(workflow_id, storage_url) commit_step(entry_workflow) # TODO(suquark): Move this to a detached named actor, # so the workflow shares fate with the actor. # The current plan is resuming the workflow on the detached named # actor. This is extremely simple to implement, but I am not sure # of its performance. output = recovery.resume_workflow_job(workflow_id, get_global_storage()) logger.info(f"Workflow job {workflow_id} started.") finally: workflow_context.set_workflow_step_context(None) return output
def resume(workflow_id: str, storage: Optional[Union[str, Storage]] = None) -> ray.ObjectRef: """Resume a workflow asynchronously. See "api.resume()" for details. """ if isinstance(storage, str): store = create_storage(storage) elif isinstance(storage, Storage): store = storage elif storage is None: store = get_global_storage() else: raise TypeError("'storage' should be None, str, or Storage type.") logger.info(f"Resuming workflow [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") try: actor = ray.get_actor(MANAGEMENT_ACTOR_NAME) except ValueError: # the actor does not exist actor = WorkflowManagementActor.options( name=MANAGEMENT_ACTOR_NAME, lifetime="detached").remote() # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. output = ray.get( actor.run_or_resume.remote(workflow_id, store.storage_url)) direct_output = flatten_workflow_output(workflow_id, output) logger.info(f"Workflow job {workflow_id} resumed.") return direct_output
def test_failure_with_storage(workflow_start_regular): with tempfile.TemporaryDirectory() as temp_dir: debug_store = DebugStorage(get_global_storage(), temp_dir) _alter_storage(debug_store) wf = construct_workflow(length=3) result = wf.run(workflow_id="complex_workflow") index = _locate_initial_commit(debug_store) + 1 def resume(num_records_replayed): key = debug_store.wrapped_storage.make_key("complex_workflow") asyncio_run(debug_store.wrapped_storage.delete_prefix(key)) replays = [ debug_store.replay(i) for i in range(num_records_replayed) ] asyncio_run(asyncio.gather(*replays)) return ray.get(workflow.resume(workflow_id="complex_workflow")) with pytest.raises(ValueError): # in cases, the replayed records are too few to resume the # workflow. resume(index - 1) if isinstance(debug_store.wrapped_storage, FilesystemStorageImpl): # filesystem is faster, so we can cover all cases step_len = 1 else: step_len = max((len(debug_store) - index) // 5, 1) for j in range(index, len(debug_store), step_len): assert resume(j) == result
def step(method_name, method, *args, **kwargs): readonly = getattr(method, "__virtual_actor_readonly__", False) flattened_args = self.flatten_args(method_name, args, kwargs) actor_id = workflow_context.get_current_workflow_id() if not readonly: if method_name == "__init__": state_ref = None else: ws = WorkflowStorage(actor_id, get_global_storage()) state_ref = WorkflowRef(ws.get_entrypoint_step_id()) # This is a hack to insert a positional argument. flattened_args = [signature.DUMMY_TYPE, state_ref ] + flattened_args workflow_inputs = serialization_context.make_workflow_inputs( flattened_args) if readonly: _actor_method = _wrap_readonly_actor_method( actor_id, self.cls, method_name) step_type = StepType.READONLY_ACTOR_METHOD else: _actor_method = _wrap_actor_method(self.cls, method_name) step_type = StepType.ACTOR_METHOD # TODO(suquark): Support actor options. workflow_data = WorkflowData( func_body=_actor_method, step_type=step_type, inputs=workflow_inputs, max_retries=1, catch_exceptions=False, ray_options={}, name=None, ) wf = Workflow(workflow_data) return wf
def run(entry_workflow: Workflow, workflow_id: Optional[str] = None, overwrite: bool = True) -> ray.ObjectRef: """Run a workflow asynchronously. # TODO(suquark): The current "run" always overwrite existing workflow. # We need to fix this later. """ store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{entry_workflow.id}.{time.time():.9f}" logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) commit_step(ws, "", entry_workflow) workflow_manager = get_or_create_management_actor() ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION) # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)) if entry_workflow.data.step_type == StepType.FUNCTION: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
def run(entry_workflow: Workflow, storage: Optional[Union[str, Storage]] = None, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. See "api.run()" for details.""" if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{entry_workflow.id}.{time.time():.9f}" if isinstance(storage, str): set_global_storage(create_storage(storage)) elif isinstance(storage, Storage): set_global_storage(storage) elif storage is not None: raise TypeError("'storage' should be None, str, or Storage type.") storage_url = get_global_storage().storage_url logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{storage_url}\"].") try: workflow_context.init_workflow_step_context(workflow_id, storage_url) commit_step(entry_workflow) try: actor = ray.get_actor(MANAGEMENT_ACTOR_NAME) except ValueError: # the actor does not exist actor = WorkflowManagementActor.options( name=MANAGEMENT_ACTOR_NAME, lifetime="detached").remote() # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. output = ray.get(actor.run_or_resume.remote(workflow_id, storage_url)) direct_output = flatten_workflow_output(workflow_id, output) finally: workflow_context.set_workflow_step_context(None) return direct_output
def _get_storage_url(storage: Optional[Union[str, Storage]]) -> str: if storage is None: return get_global_storage().storage_url elif isinstance(storage, str): return storage elif isinstance(storage, Storage): return storage.storage_url else: raise TypeError("'storage' should be None, str, or Storage type.")
def s3_storage(aws_credentials, s3_server): with mock_s3(): client = boto3.client("s3", region_name="us-west-2", endpoint_url=s3_server) client.create_bucket(Bucket="test_bucket") url = ("s3://test_bucket/workflow" f"?region_name=us-west-2&endpoint_url={s3_server}") storage.set_global_storage(storage.create_storage(url)) yield storage.get_global_storage()
def __init__(self, workflow_id: Optional[str] = None, store: Optional[storage.Storage] = None): if workflow_id is None: context = workflow_context.get_workflow_step_context() workflow_id = context.workflow_id if store is None: store = storage.get_global_storage() self._storage = store self._workflow_id = workflow_id
def get_actor(actor_id: str) -> "VirtualActor": """Get an virtual actor. Args: actor_id: The ID of the actor. Returns: A virtual actor. """ return virtual_actor_class.get_actor(actor_id, storage_base.get_global_storage())
def _readonly_actor_method(*args, **kwargs): storage = get_global_storage() instance = cls.__new__(cls) try: state = get_latest_output(actor_id, storage) except Exception as e: raise VirtualActorNotInitializedError( f"Virtual actor '{actor_id}' has not been initialized. " "We cannot get the latest state for the " "readonly virtual actor.") from e instance.__setstate__(state) method = getattr(instance, method_name) return method(*args, **kwargs)
def get_workflow_storage(workflow_id: Optional[str] = None) -> WorkflowStorage: """Get the storage for the workflow. Args: workflow_id: The ID of the storage. Returns: A workflow storage. """ store = storage.get_global_storage() if workflow_id is None: workflow_id = workflow_context.get_workflow_step_context().workflow_id return WorkflowStorage(workflow_id, store)
def init_management_actor() -> None: """Initialize WorkflowManagementActor""" store = storage.get_global_storage() try: workflow_manager = ray.get_actor(MANAGEMENT_ACTOR_NAME) storage_url = ray.get(workflow_manager.get_storage_url.remote()) if storage_url != store.storage_url: raise RuntimeError("The workflow is using a storage " f"({store.storage_url}) different from the " f"workflow manager({storage_url}).") except ValueError: logger.info("Initializing workflow manager...") # the actor does not exist WorkflowManagementActor.options(name=MANAGEMENT_ACTOR_NAME, lifetime="detached").remote(store)
def resume(workflow_id: str) -> ray.ObjectRef: """Resume a workflow asynchronously. See "api.resume()" for details. """ storage = get_global_storage() logger.info(f"Resuming workflow [id=\"{workflow_id}\", storage_url=" f"\"{storage.storage_url}\"].") workflow_manager = get_or_create_management_actor() # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. output = ray.get(workflow_manager.run_or_resume.remote(workflow_id)) direct_output = flatten_workflow_output(workflow_id, output) logger.info(f"Workflow job {workflow_id} resumed.") return direct_output
async def test_kv_storage(workflow_start_regular): kv_store = storage.get_global_storage() json_data = {"hello": "world"} bin_data = (31416).to_bytes(8, "big") key_1 = kv_store.make_key("aaa", "bbb", "ccc") key_2 = kv_store.make_key("aaa", "ddd") key_3 = kv_store.make_key("aaa", "eee") await kv_store.put(key_1, json_data, is_json=True) await kv_store.put(key_2, bin_data, is_json=False) assert json_data == await kv_store.get(key_1, is_json=True) assert bin_data == await kv_store.get(key_2, is_json=False) with pytest.raises(storage.KeyNotFoundError): await kv_store.get(key_3) prefix = kv_store.make_key("aaa") assert set(await kv_store.scan_prefix(prefix)) == {"bbb", "ddd"} assert set(await kv_store.scan_prefix(kv_store.make_key(""))) == {"aaa"}
def get_actor( actor_id: str, storage: "Optional[Union[str, Storage]]" = None) -> "VirtualActor": """Get an virtual actor. Args: actor_id: The ID of the actor. storage: The storage of the actor. Returns: A virtual actor. """ if storage is None: storage = storage_base.get_global_storage() elif isinstance(storage, str): storage = storage_base.create_storage(storage) return virtual_actor_class.get_actor(actor_id, storage)
def resume(workflow_id: str, storage: Optional[Union[str, Storage]] = None) -> ray.ObjectRef: """Resume a workflow asynchronously. See "api.resume()" for details. """ if isinstance(storage, str): store = create_storage(storage) elif isinstance(storage, Storage): store = storage elif storage is None: store = get_global_storage() else: raise TypeError("'storage' should be None, str, or Storage type.") logger.info(f"Resuming workflow [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") output = recovery.resume_workflow_job(workflow_id, store) logger.info(f"Workflow job {workflow_id} resumed.") return output
def get_or_create_management_actor() -> "ActorHandle": """Get or create WorkflowManagementActor""" # TODO(suquark): We should not get the actor everytime. We also need to # resume the actor if it failed. Using a global variable to cache the # actor seems not enough to resume the actor, because there is no # aliveness detection for an actor. try: workflow_manager = ray.get_actor(MANAGEMENT_ACTOR_NAME) except ValueError: store = storage.get_global_storage() # the actor does not exist logger.warning("Cannot access workflow manager. It could because " "the workflow manager exited unexpectedly. A new " "workflow manager is being created with storage " f"'{store}'.") workflow_manager = WorkflowManagementActor.options( name=MANAGEMENT_ACTOR_NAME, lifetime="detached").remote(store) return workflow_manager
def run(entry_workflow: Workflow, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. """ store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{str(uuid.uuid4())}.{time.time():.9f}" logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") with workflow_context.workflow_step_context(workflow_id, store.storage_url): # checkpoint the workflow ws = workflow_storage.get_workflow_storage(workflow_id) wf_exists = True try: ws.get_entrypoint_step_id() except Exception: wf_exists = False # We only commit for # - virtual actor tasks: it's dynamic tasks, so we always add # - it's a new workflow # TODO (yic): follow up with force rerun if entry_workflow.data.step_type != StepType.FUNCTION or not wf_exists: commit_step(ws, "", entry_workflow, None) workflow_manager = get_or_create_management_actor() ignore_existing = (entry_workflow.data.step_type != StepType.FUNCTION) # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. result: "WorkflowExecutionResult" = ray.get( workflow_manager.run_or_resume.remote(workflow_id, ignore_existing)) if entry_workflow.data.step_type == StepType.FUNCTION: return flatten_workflow_output(workflow_id, result.persisted_output) else: return flatten_workflow_output(workflow_id, result.volatile_output)
def run(entry_workflow: Workflow, workflow_id: Optional[str] = None) -> ray.ObjectRef: """Run a workflow asynchronously. See "api.run()" for details.""" store = get_global_storage() assert ray.is_initialized() if workflow_id is None: # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds} workflow_id = f"{entry_workflow.id}.{time.time():.9f}" logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url=" f"\"{store.storage_url}\"].") # checkpoint the workflow ws = workflow_storage.WorkflowStorage(workflow_id, store) commit_step(ws, "", entry_workflow) workflow_manager = get_or_create_management_actor() # NOTE: It is important to 'ray.get' the returned output. This # ensures caller of 'run()' holds the reference to the workflow # result. Otherwise if the actor removes the reference of the # workflow output, the caller may fail to resolve the result. output = ray.get(workflow_manager.run_or_resume.remote(workflow_id)) return flatten_workflow_output(workflow_id, output)
def init(storage: "Optional[Union[str, Storage]]" = None) -> None: """Initialize workflow. Args: storage: The external storage URL or a custom storage class. If not specified, ``/tmp/ray/workflow_data`` will be used. """ if storage is None: storage = os.environ.get("RAY_WORKFLOW_STORAGE") if storage is None: # We should use get_temp_dir_path, but for ray client, we don't # have this one. We need a flag to tell whether it's a client # or a driver to use the right dir. # For now, just use /tmp/ray/workflow_data logger.warning("Using default local dir: `/tmp/ray/workflow_data`. " "This should only be used for testing purposes.") storage = "file:///tmp/ray/workflow_data" if isinstance(storage, str): storage = storage_base.create_storage(storage) elif not isinstance(storage, Storage): raise TypeError("'storage' should be None, str, or Storage type.") try: _storage = storage_base.get_global_storage() except RuntimeError: pass else: # we have to use the 'else' branch because we would raise a # runtime error, but we do not want to be captured by 'except' if _storage.storage_url == storage.storage_url: logger.warning("Calling 'workflow.init()' again with the same " "storage.") else: raise RuntimeError("Calling 'workflow.init()' again with a " "different storage") storage_base.set_global_storage(storage) workflow_access.init_management_actor()
def resume(workflow_id: str, workflow_root_dir=None) -> ray.ObjectRef: """ Resume a workflow asynchronously. This workflow maybe fail previously. Args: workflow_id: The ID of the workflow. The ID is used to identify the workflow. workflow_root_dir: The path of an external storage used for checkpointing. Returns: The execution result of the workflow, represented by Ray ObjectRef. """ assert ray.is_initialized() if workflow_root_dir is not None: store = storage.create_storage(workflow_root_dir) else: store = storage.get_global_storage() r = recovery.resume_workflow_job(workflow_id, store) if isinstance(r, ray.ObjectRef): return r # skip saving the DAG of a recovery workflow r.skip_saving_workflow_dag = True return run(r, workflow_root_dir, workflow_id)
def get_or_create(self, actor_id: str, *args, **kwargs): return actor_cls._get_or_create(args=args, kwargs=kwargs, actor_id=actor_id, storage=get_global_storage())
def get_or_create(self, actor_id: str, *args, **kwargs) -> "VirtualActor": """Create an actor. See `VirtualActorClassBase.create()`.""" return self._get_or_create(actor_id, args=args, kwargs=kwargs, storage=get_global_storage())
def filesystem_storage(tmp_path): storage.set_global_storage( storage.create_storage(f"{str(tmp_path)}/workflow_data")) yield storage.get_global_storage()
def test_workflow_storage(workflow_start_regular): raw_storage = workflow_storage._StorageImpl(storage.get_global_storage()) workflow_id = test_workflow_storage.__name__ step_id = "some_step" input_metadata = { "name": "test_basic_workflows.append1", "step_type": StepType.FUNCTION, "object_refs": ["abc"], "workflows": ["def"], "workflow_refs": ["some_ref"], "max_retries": 1, "catch_exceptions": False, "ray_options": {}, } output_metadata = { "output_step_id": "a12423", "dynamic_output_step_id": "b1234" } flattened_args = [ signature.DUMMY_TYPE, 1, signature.DUMMY_TYPE, "2", "k", b"543" ] args = signature.recover_args(flattened_args) output = ["the_answer"] object_resolved = 42 obj_ref = ray.put(object_resolved) # test basics asyncio_run( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata)) asyncio_run( raw_storage.save_step_func_body(workflow_id, step_id, some_func)) asyncio_run( raw_storage.save_step_args(workflow_id, step_id, flattened_args)) asyncio_run(raw_storage.save_object_ref(workflow_id, obj_ref)) asyncio_run( raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata)) asyncio_run(raw_storage.save_step_output(workflow_id, step_id, output)) wf_storage = workflow_storage.WorkflowStorage(workflow_id, storage.get_global_storage()) assert wf_storage.load_step_output(step_id) == output assert wf_storage.load_step_args(step_id, [], [], []) == args assert wf_storage.load_step_func_body(step_id)(33) == 34 assert ray.get(wf_storage.load_object_ref( obj_ref.hex())) == object_resolved # test "inspect_step" inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_object_valid=True) assert inspect_result.is_recoverable() step_id = "some_step2" asyncio_run( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata)) asyncio_run( raw_storage.save_step_func_body(workflow_id, step_id, some_func)) asyncio_run(raw_storage.save_step_args(workflow_id, step_id, args)) asyncio_run( raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_step_id=output_metadata["dynamic_output_step_id"]) assert inspect_result.is_recoverable() step_id = "some_step3" asyncio_run( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata)) asyncio_run( raw_storage.save_step_func_body(workflow_id, step_id, some_func)) asyncio_run(raw_storage.save_step_args(workflow_id, step_id, args)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( step_type=StepType.FUNCTION, args_valid=True, func_body_valid=True, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], ray_options={}) assert inspect_result.is_recoverable() step_id = "some_step4" asyncio_run( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata)) asyncio_run( raw_storage.save_step_func_body(workflow_id, step_id, some_func)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( step_type=StepType.FUNCTION, func_body_valid=True, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], ray_options={}) assert not inspect_result.is_recoverable() step_id = "some_step5" asyncio_run( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata)) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( step_type=StepType.FUNCTION, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"], workflow_refs=input_metadata["workflow_refs"], ray_options={}) assert not inspect_result.is_recoverable() step_id = "some_step6" inspect_result = wf_storage.inspect_step(step_id) print(inspect_result) assert inspect_result == workflow_storage.StepInspectResult() assert not inspect_result.is_recoverable()
def test_workflow_storage(): ray.init() workflow_id = test_workflow_storage.__name__ raw_storage = storage.get_global_storage() step_id = "some_step" input_metadata = { "name": "test_basic_workflows.append1", "object_refs": ["abc"], "workflows": ["def"] } output_metadata = { "output_step_id": "a12423", "dynamic_output_step_id": "b1234" } args = ([1, "2"], {"k": b"543"}) output = ["the_answer"] object_resolved = 42 rref = ray.put(object_resolved) # test basics raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) raw_storage.save_step_func_body(workflow_id, step_id, some_func) raw_storage.save_step_args(workflow_id, step_id, args) raw_storage.save_object_ref(workflow_id, rref) raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata) raw_storage.save_step_output(workflow_id, step_id, output) wf_storage = workflow_storage.WorkflowStorage(workflow_id) assert wf_storage.load_step_output(step_id) == output assert wf_storage.load_step_args(step_id, [], []) == args assert wf_storage.load_step_func_body(step_id)(33) == 34 assert ray.get(wf_storage.load_object_ref(rref.hex())) == object_resolved # test "inspect_step" inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_object_valid=True) assert inspect_result.is_recoverable() step_id = "some_step2" raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) raw_storage.save_step_func_body(workflow_id, step_id, some_func) raw_storage.save_step_args(workflow_id, step_id, args) raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( output_step_id=output_metadata["dynamic_output_step_id"]) assert inspect_result.is_recoverable() step_id = "some_step3" raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) raw_storage.save_step_func_body(workflow_id, step_id, some_func) raw_storage.save_step_args(workflow_id, step_id, args) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( args_valid=True, func_body_valid=True, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"]) assert inspect_result.is_recoverable() step_id = "some_step4" raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) raw_storage.save_step_func_body(workflow_id, step_id, some_func) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( func_body_valid=True, object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"]) assert not inspect_result.is_recoverable() step_id = "some_step5" raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult( object_refs=input_metadata["object_refs"], workflows=input_metadata["workflows"]) assert not inspect_result.is_recoverable() step_id = "some_step6" inspect_result = wf_storage.inspect_step(step_id) assert inspect_result == workflow_storage.StepInspectResult() assert not inspect_result.is_recoverable() ray.shutdown()
def test_raw_storage(): ray.init() workflow_id = test_workflow_storage.__name__ raw_storage = storage.get_global_storage() step_id = "some_step" input_metadata = {"2": "c"} output_metadata = {"a": 1} args = ([1, "2"], {"k": b"543"}) output = ["the_answer"] object_resolved = 42 rref = ray.put(object_resolved) # test creating normal objects raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) raw_storage.save_step_func_body(workflow_id, step_id, some_func) raw_storage.save_step_args(workflow_id, step_id, args) raw_storage.save_object_ref(workflow_id, rref) raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata) raw_storage.save_step_output(workflow_id, step_id, output) step_status = raw_storage.get_step_status(workflow_id, step_id) assert step_status.args_exists assert step_status.output_object_exists assert step_status.output_metadata_exists assert step_status.input_metadata_exists assert step_status.func_body_exists assert raw_storage.load_step_input_metadata(workflow_id, step_id) == input_metadata assert raw_storage.load_step_func_body(workflow_id, step_id)(33) == 34 assert raw_storage.load_step_args(workflow_id, step_id) == args assert ray.get(raw_storage.load_object_ref(workflow_id, rref.hex())) == object_resolved assert raw_storage.load_step_output_metadata(workflow_id, step_id) == output_metadata assert raw_storage.load_step_output(workflow_id, step_id) == output # test overwrite input_metadata = [input_metadata, "overwrite"] output_metadata = [output_metadata, "overwrite"] args = (args, "overwrite") output = (output, "overwrite") object_resolved = (object_resolved, "overwrite") rref = ray.put(object_resolved) raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata) raw_storage.save_step_func_body(workflow_id, step_id, some_func2) raw_storage.save_step_args(workflow_id, step_id, args) raw_storage.save_object_ref(workflow_id, rref) raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata) raw_storage.save_step_output(workflow_id, step_id, output) assert raw_storage.load_step_input_metadata(workflow_id, step_id) == input_metadata assert raw_storage.load_step_func_body(workflow_id, step_id)(33) == 32 assert raw_storage.load_step_args(workflow_id, step_id) == args assert ray.get(raw_storage.load_object_ref(workflow_id, rref.hex())) == object_resolved assert raw_storage.load_step_output_metadata(workflow_id, step_id) == output_metadata assert raw_storage.load_step_output(workflow_id, step_id) == output ray.shutdown()
async def test_raw_storage(workflow_start_regular): raw_storage = workflow_storage._StorageImpl(storage.get_global_storage()) workflow_id = test_workflow_storage.__name__ step_id = "some_step" input_metadata = {"2": "c"} output_metadata = {"a": 1} args = ([1, "2"], {"k": b"543"}) output = ["the_answer"] object_resolved = 42 obj_ref = ray.put(object_resolved) progress_metadata = {"step_id": "the_current_progress"} # test creating normal objects await asyncio.gather( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata), raw_storage.save_step_func_body(workflow_id, step_id, some_func), raw_storage.save_step_args(workflow_id, step_id, args), raw_storage.save_object_ref(workflow_id, obj_ref), raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata), raw_storage.save_step_output(workflow_id, step_id, output), raw_storage.save_workflow_progress(workflow_id, progress_metadata)) step_status = await raw_storage.get_step_status(workflow_id, step_id) assert step_status.args_exists assert step_status.output_object_exists assert step_status.output_metadata_exists assert step_status.input_metadata_exists assert step_status.func_body_exists (load_input_metadata, load_step_func_body, load_step_args, load_object_ref, load_step_output_meta, load_step_output, load_workflow_progress) = await asyncio.gather( raw_storage.load_step_input_metadata(workflow_id, step_id), raw_storage.load_step_func_body(workflow_id, step_id), raw_storage.load_step_args(workflow_id, step_id), raw_storage.load_object_ref(workflow_id, obj_ref.hex()), raw_storage.load_step_output_metadata(workflow_id, step_id), raw_storage.load_step_output(workflow_id, step_id), raw_storage.load_workflow_progress(workflow_id)) assert load_input_metadata == input_metadata assert load_step_func_body(33) == 34 assert load_step_args == args assert ray.get(load_object_ref) == object_resolved assert load_step_output_meta == output_metadata assert load_step_output == output assert load_workflow_progress == progress_metadata # test overwrite input_metadata = [input_metadata, "overwrite"] output_metadata = [output_metadata, "overwrite"] progress_metadata = {"step_id": "overwrite"} args = (args, "overwrite") output = (output, "overwrite") object_resolved = (object_resolved, "overwrite") obj_ref = ray.put(object_resolved) await asyncio.gather( raw_storage.save_step_input_metadata(workflow_id, step_id, input_metadata), raw_storage.save_step_func_body(workflow_id, step_id, some_func2), raw_storage.save_step_args(workflow_id, step_id, args), raw_storage.save_object_ref(workflow_id, obj_ref), raw_storage.save_step_output_metadata(workflow_id, step_id, output_metadata), raw_storage.save_step_output(workflow_id, step_id, output), raw_storage.save_workflow_progress(workflow_id, progress_metadata)) (load_input_metadata, load_step_func_body, load_step_args, load_object_ref, load_step_output_meta, load_step_output, load_workflow_progress) = await asyncio.gather( raw_storage.load_step_input_metadata(workflow_id, step_id), raw_storage.load_step_func_body(workflow_id, step_id), raw_storage.load_step_args(workflow_id, step_id), raw_storage.load_object_ref(workflow_id, obj_ref.hex()), raw_storage.load_step_output_metadata(workflow_id, step_id), raw_storage.load_step_output(workflow_id, step_id), raw_storage.load_workflow_progress(workflow_id)) assert load_input_metadata == input_metadata assert load_step_func_body(33) == 32 assert load_step_args == args assert ray.get(load_object_ref) == object_resolved assert load_step_output_meta == output_metadata assert load_step_output == output assert load_workflow_progress == progress_metadata