def test_torch_serialization(large_buffer): pytest.importorskip("torch") serialization_context = pa.default_serialization_context() pa.register_torch_serialization_handlers(serialization_context) # Dense tensors: # These are the only types that are supported for the # PyTorch to NumPy conversion for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]: obj = torch.from_numpy(np.random.randn(1000).astype(t)) serialization_roundtrip(obj, large_buffer, context=serialization_context) tensor_requiring_grad = torch.randn(10, 10, requires_grad=True) serialization_roundtrip(tensor_requiring_grad, large_buffer, context=serialization_context) # Sparse tensors: # These are the only types that are supported for the # PyTorch to NumPy conversion for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]: i = torch.LongTensor([[0, 2], [1, 0], [1, 2]]) v = torch.from_numpy(np.array([3, 4, 5]).astype(t)) obj = torch.sparse_coo_tensor(i.t(), v, torch.Size([2, 3])) serialization_roundtrip(obj, large_buffer, context=serialization_context)
def test_torch_serialization(large_buffer): pytest.importorskip("torch") serialization_context = pa.default_serialization_context() pa.register_torch_serialization_handlers(serialization_context) # These are the only types that are supported for the # PyTorch to NumPy conversion for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]: obj = torch.from_numpy(np.random.randn(1000).astype(t)) serialization_roundtrip(obj, large_buffer, context=serialization_context)
def test_torch_serialization(large_buffer): pytest.importorskip("torch") serialization_context = pa.default_serialization_context() pa.register_torch_serialization_handlers(serialization_context) # These are the only types that are supported for the # PyTorch to NumPy conversion for t in ["float32", "float64", "uint8", "int16", "int32", "int64"]: obj = torch.from_numpy(np.random.randn(1000).astype(t)) serialization_roundtrip(obj, large_buffer, context=serialization_context)
def __init__(self, worker): self.worker = worker self.use_pickle = worker.use_pickle def actor_handle_serializer(obj): return obj._serialization_helper(True) def actor_handle_deserializer(serialized_obj): new_handle = ray.actor.ActorHandle.__new__(ray.actor.ActorHandle) new_handle._deserialization_helper(serialized_obj, True) return new_handle if not worker.use_pickle: serialization_context = pyarrow.default_serialization_context() # Tell the serialization context to use the cloudpickle version # that we ship with Ray. serialization_context.set_pickle(pickle.dumps, pickle.loads) pyarrow.register_torch_serialization_handlers( serialization_context) def id_serializer(obj): if isinstance(obj, ray.ObjectID) and obj.is_direct_call_type(): obj = self.worker.core_worker.promote_object_to_plasma(obj) return pickle.dumps(obj) def id_deserializer(serialized_obj): return pickle.loads(serialized_obj) for id_type in ray._raylet._ID_TYPES: serialization_context.register_type( id_type, "{}.{}".format(id_type.__module__, id_type.__name__), custom_serializer=id_serializer, custom_deserializer=id_deserializer) # We register this serializer on each worker instead of calling # _register_custom_serializer from the driver so that isinstance # still works. serialization_context.register_type( ray.actor.ActorHandle, "ray.ActorHandle", pickle=False, custom_serializer=actor_handle_serializer, custom_deserializer=actor_handle_deserializer) self.pyarrow_context = serialization_context else: self._register_cloudpickle_serializer( ray.actor.ActorHandle, custom_serializer=actor_handle_serializer, custom_deserializer=actor_handle_deserializer) def id_serializer(obj): if isinstance(obj, ray.ObjectID) and obj.is_direct_call_type(): obj = self.worker.core_worker.promote_object_to_plasma(obj) return obj.__reduce__() def id_deserializer(serialized_obj): return serialized_obj[0](*serialized_obj[1]) for id_type in ray._raylet._ID_TYPES: self._register_cloudpickle_serializer(id_type, id_serializer, id_deserializer)
def __init__(self, worker): self.worker = worker self.use_pickle = worker.use_pickle def actor_handle_serializer(obj): return obj._serialization_helper(True) def actor_handle_deserializer(serialized_obj): new_handle = ray.actor.ActorHandle.__new__(ray.actor.ActorHandle) new_handle._deserialization_helper(serialized_obj, True) return new_handle if not worker.use_pickle: serialization_context = pyarrow.default_serialization_context() # Tell the serialization context to use the cloudpickle version # that we ship with Ray. serialization_context.set_pickle(pickle.dumps, pickle.loads) pyarrow.register_torch_serialization_handlers( serialization_context) def id_serializer(obj): return pickle.dumps(obj) def id_deserializer(serialized_obj): return pickle.loads(serialized_obj) def object_id_serializer(obj): owner_id = "" owner_address = "" if obj.is_direct_call_type(): worker = ray.worker.get_global_worker() worker.check_connected() obj, owner_id, owner_address = ( worker.core_worker.serialize_and_promote_object_id(obj) ) obj = obj.__reduce__() owner_id = owner_id.__reduce__() if owner_id else owner_id return pickle.dumps((obj, owner_id, owner_address)) def object_id_deserializer(serialized_obj): obj_id, owner_id, owner_address = pickle.loads(serialized_obj) # NOTE(swang): Must deserialize the object first before asking # the core worker to resolve the value. This is to make sure # that the ref count for the ObjectID is greater than 0 by the # time the core worker resolves the value of the object. deserialized_object_id = obj_id[0](obj_id[1][0]) if owner_id: worker = ray.worker.get_global_worker() worker.check_connected() # UniqueIDs are serialized as # (class name, (unique bytes,)). worker.core_worker.deserialize_and_register_object_id( obj_id[1][0], owner_id[1][0], owner_address) return deserialized_object_id for id_type in ray._raylet._ID_TYPES: if id_type == ray._raylet.ObjectID: serialization_context.register_type( id_type, "{}.{}".format(id_type.__module__, id_type.__name__), custom_serializer=object_id_serializer, custom_deserializer=object_id_deserializer) else: serialization_context.register_type( id_type, "{}.{}".format(id_type.__module__, id_type.__name__), custom_serializer=id_serializer, custom_deserializer=id_deserializer) # We register this serializer on each worker instead of calling # _register_custom_serializer from the driver so that isinstance # still works. serialization_context.register_type( ray.actor.ActorHandle, "ray.ActorHandle", pickle=False, custom_serializer=actor_handle_serializer, custom_deserializer=actor_handle_deserializer) self.pyarrow_context = serialization_context else: self._register_cloudpickle_serializer( ray.actor.ActorHandle, custom_serializer=actor_handle_serializer, custom_deserializer=actor_handle_deserializer) def id_serializer(obj): return obj.__reduce__() def id_deserializer(serialized_obj): return serialized_obj[0](*serialized_obj[1]) def object_id_serializer(obj): owner_id = "" owner_address = "" if obj.is_direct_call_type(): worker = ray.worker.get_global_worker() worker.check_connected() obj, owner_id, owner_address = ( worker.core_worker.serialize_and_promote_object_id(obj) ) obj = id_serializer(obj) owner_id = id_serializer(owner_id) if owner_id else owner_id return (obj, owner_id, owner_address) def object_id_deserializer(serialized_obj): obj_id, owner_id, owner_address = serialized_obj # NOTE(swang): Must deserialize the object first before asking # the core worker to resolve the value. This is to make sure # that the ref count for the ObjectID is greater than 0 by the # time the core worker resolves the value of the object. deserialized_object_id = id_deserializer(obj_id) if owner_id: worker = ray.worker.get_global_worker() worker.check_connected() # UniqueIDs are serialized as # (class name, (unique bytes,)). worker.core_worker.deserialize_and_register_object_id( obj_id[1][0], owner_id[1][0], owner_address) return deserialized_object_id for id_type in ray._raylet._ID_TYPES: if id_type == ray._raylet.ObjectID: self._register_cloudpickle_serializer( id_type, object_id_serializer, object_id_deserializer) else: self._register_cloudpickle_serializer( id_type, id_serializer, id_deserializer)
'int': 3000, 'float': 100.123456 } if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("task") args = parser.parse_args() serializers = [ ("msgpack", MsgpackSerializer.dumps, MsgpackSerializer.loads), ("pyarrow-buf", PyarrowSerializer.dumps, PyarrowSerializer.loads), ("pyarrow-bytes", PyarrowSerializer.dumps_bytes, PyarrowSerializer.loads), ("pickle", PickleSerializer.dumps, PickleSerializer.loads), ("forking-pickle", ForkingPickler.dumps, ForkingPickler.loads), ] if args.task == "numpy": numpy_data = [np.random.rand(64, 224, 224, 3).astype("float32"), np.random.rand(64).astype('int32')] benchmark_all("numpy data", serializers, numpy_data) elif args.task == "json": benchmark_all("json data", serializers, fake_json_data(), num=50) elif args.task == "torch": import torch from pyarrow.lib import _default_serialization_context pa.register_torch_serialization_handlers(_default_serialization_context) torch_data = [torch.rand(64, 224, 224, 3), torch.rand(64).to(dtype=torch.int32)] benchmark_all("torch data", serializers[1:], torch_data)