def workflow_args_serialization_context( workflow_refs: List[WorkflowRef]) -> None: """ This serialization context reduces workflow input arguments to three parts: 1. A workflow input placeholder. It is an object without 'Workflow' and 'ObjectRef' object. They are replaced with integer indices. During deserialization, we can refill the placeholder with a list of 'Workflow' and a list of 'ObjectRef'. This provides us great flexibility, for example, during recovery we can plug an alternative list of 'Workflow' and 'ObjectRef', since we lose the original ones. 2. A list of 'Workflow'. There is no duplication in it. 3. A list of 'ObjectRef'. There is no duplication in it. We do not allow duplication because in the arguments duplicated workflows and object refs are shared by reference. So when deserialized, we also want them to be shared by reference. See "tests/test_object_deref.py:deref_shared" as an example. The deduplication works like this: Inputs: [A B A B C C A] Output List: [A B C] Index in placeholder: [0 1 0 1 2 2 0] Args: workflow_refs: Output list of workflows or references to workflows. """ deduplicator: Dict[WorkflowRef, int] = {} def serializer(w): if w in deduplicator: return deduplicator[w] if isinstance(w, WorkflowRef): # The ref should be resolved by the workflow management actor # when treated as the input of a workflow, so we remove the ref here. w.ref = None i = len(workflow_refs) workflow_refs.append(w) deduplicator[w] = i return i register_serializer( WorkflowRef, serializer=serializer, deserializer=_resolve_workflow_refs, ) try: yield finally: # we do not want to serialize Workflow objects in other places. deregister_serializer(WorkflowRef)
def workflow_args_serialization_context( workflows: List[Workflow], object_refs: List[ray.ObjectRef], workflow_refs: List[WorkflowRef]) -> None: """ This serialization context reduces workflow input arguments to three parts: 1. A workflow input placeholder. It is an object without 'Workflow' and 'ObjectRef' object. They are replaced with integer indices. During deserialization, we can refill the placeholder with a list of 'Workflow' and a list of 'ObjectRef'. This provides us great flexibility, for example, during recovery we can plug an alternative list of 'Workflow' and 'ObjectRef', since we lose the original ones. 2. A list of 'Workflow'. There is no duplication in it. 3. A list of 'ObjectRef'. There is no duplication in it. We do not allow duplication because in the arguments duplicated workflows and object refs are shared by reference. So when deserialized, we also want them to be shared by reference. See "tests/test_object_deref.py:deref_shared" as an example. The deduplication works like this: Inputs: [A B A B C C A] Output List: [A B C] Index in placeholder: [0 1 0 1 2 2 0] Args: workflows: Workflow list output. object_refs: ObjectRef list output. """ workflow_deduplicator: Dict[Workflow, int] = {} objectref_deduplicator: Dict[ray.ObjectRef, int] = {} workflowref_deduplicator: Dict[WorkflowRef, int] = {} def workflow_serializer(workflow): if workflow in workflow_deduplicator: return workflow_deduplicator[workflow] i = len(workflows) workflows.append(workflow) workflow_deduplicator[workflow] = i return i register_serializer(Workflow, serializer=workflow_serializer, deserializer=_resolve_workflow_outputs) def objectref_serializer(obj_ref): if obj_ref in objectref_deduplicator: return objectref_deduplicator[obj_ref] i = len(object_refs) object_refs.append(obj_ref) objectref_deduplicator[obj_ref] = i return i # override the default ObjectRef serializer # TODO(suquark): We are using Ray internal APIs to access serializers. # This is only a workaround. We need alternatives later. ray_objectref_reducer_backup = ray.cloudpickle.CloudPickler.dispatch[ ray.ObjectRef] register_serializer(ray.ObjectRef, serializer=objectref_serializer, deserializer=_resolve_objectrefs) def workflow_ref_serializer(workflow_ref): if workflow_ref in workflowref_deduplicator: return workflowref_deduplicator[workflow_ref] i = len(workflow_refs) workflow_refs.append(workflow_ref) workflow_deduplicator[workflow_ref] = i return i register_serializer(WorkflowRef, serializer=workflow_ref_serializer, deserializer=_resolve_workflow_refs) try: yield finally: # we do not want to serialize Workflow objects in other places. deregister_serializer(Workflow) # restore original dispatch ray.cloudpickle.CloudPickler.dispatch[ ray.ObjectRef] = ray_objectref_reducer_backup deregister_serializer(WorkflowRef)