def workflow_args_serialization_context(
        workflow_refs: List[WorkflowRef]) -> None:
    """
    This serialization context reduces workflow input arguments to three
    parts:

    1. A workflow input placeholder. It is an object without 'Workflow' and
       'ObjectRef' object. They are replaced with integer indices. During
       deserialization, we can refill the placeholder with a list of
       'Workflow' and a list of 'ObjectRef'. This provides us great
       flexibility, for example, during recovery we can plug an alternative
       list of 'Workflow' and 'ObjectRef', since we lose the original ones.
    2. A list of 'Workflow'. There is no duplication in it.
    3. A list of 'ObjectRef'. There is no duplication in it.

    We do not allow duplication because in the arguments duplicated workflows
    and object refs are shared by reference. So when deserialized, we also
    want them to be shared by reference. See
    "tests/test_object_deref.py:deref_shared" as an example.

    The deduplication works like this:
        Inputs: [A B A B C C A]
        Output List: [A B C]
        Index in placeholder: [0 1 0 1 2 2 0]

    Args:
        workflow_refs: Output list of workflows or references to workflows.
    """
    deduplicator: Dict[WorkflowRef, int] = {}

    def serializer(w):
        if w in deduplicator:
            return deduplicator[w]
        if isinstance(w, WorkflowRef):
            # The ref should be resolved by the workflow management actor
            # when treated as the input of a workflow, so we remove the ref here.
            w.ref = None
        i = len(workflow_refs)
        workflow_refs.append(w)
        deduplicator[w] = i
        return i

    register_serializer(
        WorkflowRef,
        serializer=serializer,
        deserializer=_resolve_workflow_refs,
    )

    try:
        yield
    finally:
        # we do not want to serialize Workflow objects in other places.
        deregister_serializer(WorkflowRef)
Exemple #2
0
def workflow_args_serialization_context(
        workflows: List[Workflow], object_refs: List[ray.ObjectRef],
        workflow_refs: List[WorkflowRef]) -> None:
    """
    This serialization context reduces workflow input arguments to three
    parts:

    1. A workflow input placeholder. It is an object without 'Workflow' and
       'ObjectRef' object. They are replaced with integer indices. During
       deserialization, we can refill the placeholder with a list of
       'Workflow' and a list of 'ObjectRef'. This provides us great
       flexibility, for example, during recovery we can plug an alternative
       list of 'Workflow' and 'ObjectRef', since we lose the original ones.
    2. A list of 'Workflow'. There is no duplication in it.
    3. A list of 'ObjectRef'. There is no duplication in it.

    We do not allow duplication because in the arguments duplicated workflows
    and object refs are shared by reference. So when deserialized, we also
    want them to be shared by reference. See
    "tests/test_object_deref.py:deref_shared" as an example.

    The deduplication works like this:
        Inputs: [A B A B C C A]
        Output List: [A B C]
        Index in placeholder: [0 1 0 1 2 2 0]

    Args:
        workflows: Workflow list output.
        object_refs: ObjectRef list output.
    """
    workflow_deduplicator: Dict[Workflow, int] = {}
    objectref_deduplicator: Dict[ray.ObjectRef, int] = {}
    workflowref_deduplicator: Dict[WorkflowRef, int] = {}

    def workflow_serializer(workflow):
        if workflow in workflow_deduplicator:
            return workflow_deduplicator[workflow]
        i = len(workflows)
        workflows.append(workflow)
        workflow_deduplicator[workflow] = i
        return i

    register_serializer(Workflow,
                        serializer=workflow_serializer,
                        deserializer=_resolve_workflow_outputs)

    def objectref_serializer(obj_ref):
        if obj_ref in objectref_deduplicator:
            return objectref_deduplicator[obj_ref]
        i = len(object_refs)
        object_refs.append(obj_ref)
        objectref_deduplicator[obj_ref] = i
        return i

    # override the default ObjectRef serializer
    # TODO(suquark): We are using Ray internal APIs to access serializers.
    # This is only a workaround. We need alternatives later.
    ray_objectref_reducer_backup = ray.cloudpickle.CloudPickler.dispatch[
        ray.ObjectRef]
    register_serializer(ray.ObjectRef,
                        serializer=objectref_serializer,
                        deserializer=_resolve_objectrefs)

    def workflow_ref_serializer(workflow_ref):
        if workflow_ref in workflowref_deduplicator:
            return workflowref_deduplicator[workflow_ref]
        i = len(workflow_refs)
        workflow_refs.append(workflow_ref)
        workflow_deduplicator[workflow_ref] = i
        return i

    register_serializer(WorkflowRef,
                        serializer=workflow_ref_serializer,
                        deserializer=_resolve_workflow_refs)

    try:
        yield
    finally:
        # we do not want to serialize Workflow objects in other places.
        deregister_serializer(Workflow)
        # restore original dispatch
        ray.cloudpickle.CloudPickler.dispatch[
            ray.ObjectRef] = ray_objectref_reducer_backup
        deregister_serializer(WorkflowRef)