Ejemplo n.º 1
0
def test_torch_serialization(large_buffer):
    pytest.importorskip("torch")

    serialization_context = pa.default_serialization_context()
    pa.register_torch_serialization_handlers(serialization_context)

    # Dense tensors:

    # These are the only types that are supported for the
    # PyTorch to NumPy conversion
    for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]:
        obj = torch.from_numpy(np.random.randn(1000).astype(t))
        serialization_roundtrip(obj,
                                large_buffer,
                                context=serialization_context)

    tensor_requiring_grad = torch.randn(10, 10, requires_grad=True)
    serialization_roundtrip(tensor_requiring_grad,
                            large_buffer,
                            context=serialization_context)

    # Sparse tensors:

    # These are the only types that are supported for the
    # PyTorch to NumPy conversion
    for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]:
        i = torch.LongTensor([[0, 2], [1, 0], [1, 2]])
        v = torch.from_numpy(np.array([3, 4, 5]).astype(t))
        obj = torch.sparse_coo_tensor(i.t(), v, torch.Size([2, 3]))
        serialization_roundtrip(obj,
                                large_buffer,
                                context=serialization_context)
Ejemplo n.º 2
0
def test_torch_serialization(large_buffer):
    pytest.importorskip("torch")

    serialization_context = pa.default_serialization_context()
    pa.register_torch_serialization_handlers(serialization_context)
    # These are the only types that are supported for the
    # PyTorch to NumPy conversion
    for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]:
        obj = torch.from_numpy(np.random.randn(1000).astype(t))
        serialization_roundtrip(obj,
                                large_buffer,
                                context=serialization_context)
Ejemplo n.º 3
0
def test_torch_serialization(large_buffer):
    pytest.importorskip("torch")

    serialization_context = pa.default_serialization_context()
    pa.register_torch_serialization_handlers(serialization_context)
    # These are the only types that are supported for the
    # PyTorch to NumPy conversion
    for t in ["float32", "float64",
              "uint8", "int16", "int32", "int64"]:
        obj = torch.from_numpy(np.random.randn(1000).astype(t))
        serialization_roundtrip(obj, large_buffer,
                                context=serialization_context)
Ejemplo n.º 4
0
    def __init__(self, worker):
        self.worker = worker
        self.use_pickle = worker.use_pickle

        def actor_handle_serializer(obj):
            return obj._serialization_helper(True)

        def actor_handle_deserializer(serialized_obj):
            new_handle = ray.actor.ActorHandle.__new__(ray.actor.ActorHandle)
            new_handle._deserialization_helper(serialized_obj, True)
            return new_handle

        if not worker.use_pickle:
            serialization_context = pyarrow.default_serialization_context()
            # Tell the serialization context to use the cloudpickle version
            # that we ship with Ray.
            serialization_context.set_pickle(pickle.dumps, pickle.loads)
            pyarrow.register_torch_serialization_handlers(
                serialization_context)

            def id_serializer(obj):
                if isinstance(obj, ray.ObjectID) and obj.is_direct_call_type():
                    obj = self.worker.core_worker.promote_object_to_plasma(obj)
                return pickle.dumps(obj)

            def id_deserializer(serialized_obj):
                return pickle.loads(serialized_obj)

            for id_type in ray._raylet._ID_TYPES:
                serialization_context.register_type(
                    id_type,
                    "{}.{}".format(id_type.__module__, id_type.__name__),
                    custom_serializer=id_serializer,
                    custom_deserializer=id_deserializer)

            # We register this serializer on each worker instead of calling
            # _register_custom_serializer from the driver so that isinstance
            # still works.
            serialization_context.register_type(
                ray.actor.ActorHandle,
                "ray.ActorHandle",
                pickle=False,
                custom_serializer=actor_handle_serializer,
                custom_deserializer=actor_handle_deserializer)
            self.pyarrow_context = serialization_context
        else:
            self._register_cloudpickle_serializer(
                ray.actor.ActorHandle,
                custom_serializer=actor_handle_serializer,
                custom_deserializer=actor_handle_deserializer)

            def id_serializer(obj):
                if isinstance(obj, ray.ObjectID) and obj.is_direct_call_type():
                    obj = self.worker.core_worker.promote_object_to_plasma(obj)
                return obj.__reduce__()

            def id_deserializer(serialized_obj):
                return serialized_obj[0](*serialized_obj[1])

            for id_type in ray._raylet._ID_TYPES:
                self._register_cloudpickle_serializer(id_type, id_serializer,
                                                      id_deserializer)
Ejemplo n.º 5
0
    def __init__(self, worker):
        self.worker = worker
        self.use_pickle = worker.use_pickle

        def actor_handle_serializer(obj):
            return obj._serialization_helper(True)

        def actor_handle_deserializer(serialized_obj):
            new_handle = ray.actor.ActorHandle.__new__(ray.actor.ActorHandle)
            new_handle._deserialization_helper(serialized_obj, True)
            return new_handle

        if not worker.use_pickle:
            serialization_context = pyarrow.default_serialization_context()
            # Tell the serialization context to use the cloudpickle version
            # that we ship with Ray.
            serialization_context.set_pickle(pickle.dumps, pickle.loads)
            pyarrow.register_torch_serialization_handlers(
                serialization_context)

            def id_serializer(obj):
                return pickle.dumps(obj)

            def id_deserializer(serialized_obj):
                return pickle.loads(serialized_obj)

            def object_id_serializer(obj):
                owner_id = ""
                owner_address = ""
                if obj.is_direct_call_type():
                    worker = ray.worker.get_global_worker()
                    worker.check_connected()
                    obj, owner_id, owner_address = (
                        worker.core_worker.serialize_and_promote_object_id(obj)
                    )
                obj = obj.__reduce__()
                owner_id = owner_id.__reduce__() if owner_id else owner_id
                return pickle.dumps((obj, owner_id, owner_address))

            def object_id_deserializer(serialized_obj):
                obj_id, owner_id, owner_address = pickle.loads(serialized_obj)
                # NOTE(swang): Must deserialize the object first before asking
                # the core worker to resolve the value. This is to make sure
                # that the ref count for the ObjectID is greater than 0 by the
                # time the core worker resolves the value of the object.
                deserialized_object_id = obj_id[0](obj_id[1][0])
                if owner_id:
                    worker = ray.worker.get_global_worker()
                    worker.check_connected()
                    # UniqueIDs are serialized as
                    # (class name, (unique bytes,)).
                    worker.core_worker.deserialize_and_register_object_id(
                        obj_id[1][0], owner_id[1][0], owner_address)
                return deserialized_object_id

            for id_type in ray._raylet._ID_TYPES:
                if id_type == ray._raylet.ObjectID:
                    serialization_context.register_type(
                        id_type,
                        "{}.{}".format(id_type.__module__, id_type.__name__),
                        custom_serializer=object_id_serializer,
                        custom_deserializer=object_id_deserializer)
                else:
                    serialization_context.register_type(
                        id_type,
                        "{}.{}".format(id_type.__module__, id_type.__name__),
                        custom_serializer=id_serializer,
                        custom_deserializer=id_deserializer)

            # We register this serializer on each worker instead of calling
            # _register_custom_serializer from the driver so that isinstance
            # still works.
            serialization_context.register_type(
                ray.actor.ActorHandle,
                "ray.ActorHandle",
                pickle=False,
                custom_serializer=actor_handle_serializer,
                custom_deserializer=actor_handle_deserializer)
            self.pyarrow_context = serialization_context
        else:
            self._register_cloudpickle_serializer(
                ray.actor.ActorHandle,
                custom_serializer=actor_handle_serializer,
                custom_deserializer=actor_handle_deserializer)

            def id_serializer(obj):
                return obj.__reduce__()

            def id_deserializer(serialized_obj):
                return serialized_obj[0](*serialized_obj[1])

            def object_id_serializer(obj):
                owner_id = ""
                owner_address = ""
                if obj.is_direct_call_type():
                    worker = ray.worker.get_global_worker()
                    worker.check_connected()
                    obj, owner_id, owner_address = (
                        worker.core_worker.serialize_and_promote_object_id(obj)
                    )
                obj = id_serializer(obj)
                owner_id = id_serializer(owner_id) if owner_id else owner_id
                return (obj, owner_id, owner_address)

            def object_id_deserializer(serialized_obj):
                obj_id, owner_id, owner_address = serialized_obj
                # NOTE(swang): Must deserialize the object first before asking
                # the core worker to resolve the value. This is to make sure
                # that the ref count for the ObjectID is greater than 0 by the
                # time the core worker resolves the value of the object.
                deserialized_object_id = id_deserializer(obj_id)
                if owner_id:
                    worker = ray.worker.get_global_worker()
                    worker.check_connected()
                    # UniqueIDs are serialized as
                    # (class name, (unique bytes,)).
                    worker.core_worker.deserialize_and_register_object_id(
                        obj_id[1][0], owner_id[1][0], owner_address)
                return deserialized_object_id

            for id_type in ray._raylet._ID_TYPES:
                if id_type == ray._raylet.ObjectID:
                    self._register_cloudpickle_serializer(
                        id_type, object_id_serializer, object_id_deserializer)
                else:
                    self._register_cloudpickle_serializer(
                        id_type, id_serializer, id_deserializer)
Ejemplo n.º 6
0
        'int': 3000,
        'float': 100.123456
    }


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("task")
    args = parser.parse_args()

    serializers = [
        ("msgpack", MsgpackSerializer.dumps, MsgpackSerializer.loads),
        ("pyarrow-buf", PyarrowSerializer.dumps, PyarrowSerializer.loads),
        ("pyarrow-bytes", PyarrowSerializer.dumps_bytes, PyarrowSerializer.loads),
        ("pickle", PickleSerializer.dumps, PickleSerializer.loads),
        ("forking-pickle", ForkingPickler.dumps, ForkingPickler.loads),
    ]

    if args.task == "numpy":
        numpy_data = [np.random.rand(64, 224, 224, 3).astype("float32"), np.random.rand(64).astype('int32')]
        benchmark_all("numpy data", serializers, numpy_data)
    elif args.task == "json":
        benchmark_all("json data", serializers, fake_json_data(), num=50)
    elif args.task == "torch":
        import torch
        from pyarrow.lib import _default_serialization_context

        pa.register_torch_serialization_handlers(_default_serialization_context)
        torch_data = [torch.rand(64, 224, 224, 3), torch.rand(64).to(dtype=torch.int32)]
        benchmark_all("torch data", serializers[1:], torch_data)