예제 #1
0
def test_set_pickle():
    # Use a custom type to trigger pickling.
    class Foo(object):
        pass

    context = pa.SerializationContext()
    context.register_type(Foo, 'Foo', pickle=True)

    test_object = Foo()

    # Define a custom serializer and deserializer to use in place of pickle.

    def dumps1(obj):
        return b'custom'

    def loads1(serialized_obj):
        return serialized_obj + b' serialization 1'

    # Test that setting a custom pickler changes the behavior.
    context.set_pickle(dumps1, loads1)
    serialized = pa.serialize(test_object, context=context).to_buffer()
    deserialized = pa.deserialize(serialized.to_pybytes(), context=context)
    assert deserialized == b'custom serialization 1'

    # Define another custom serializer and deserializer.

    def dumps2(obj):
        return b'custom'

    def loads2(serialized_obj):
        return serialized_obj + b' serialization 2'

    # Test that setting another custom pickler changes the behavior again.
    context.set_pickle(dumps2, loads2)
    serialized = pa.serialize(test_object, context=context).to_buffer()
    deserialized = pa.deserialize(serialized.to_pybytes(), context=context)
    assert deserialized == b'custom serialization 2'
예제 #2
0
import struct

import lz4.frame
import pyarrow
from scipy.sparse import csc_matrix

# The magic intial bytes which tell us that a given binary chunk is LZ4
# compressed data
LZ4_MAGIC_NUMBER = struct.pack("<I", 0x184D2204)

context = pyarrow.SerializationContext()


def serialize_csc(matrix):
    """
    Decompose a matrix in Compressed Sparse Column format into more basic data
    types (tuples and numpy arrays) which PyArrow knows how to serialize
    """
    return ((matrix.data, matrix.indices, matrix.indptr), matrix.shape)


def deserialize_csc(args):
    """
    Reconstruct a Compressed Sparse Column matrix from its decomposed parts
    """
    # We construct a `csc_matrix` instance by directly assigning its members,
    # rather than using `__init__` which runs additional checks that
    # significantly slow down deserialization. Because we know these values
    # came from properly constructed matrices we can skip these checks
    (data, indices, indptr), shape = args
    matrix = csc_matrix.__new__(csc_matrix)
예제 #3
0
def test_serialization_deprecated_toplevel():
    with pytest.warns(FutureWarning):
        pa.SerializedPyObject()

    with pytest.warns(FutureWarning):
        pa.SerializationContext()
예제 #4
0
    return np.ma.MaskedArray(data,
                             mask=mask,
                             fill_value=fill_value,
                             hard_mask=hardmask)


def _serialize_numpy_masked_constant(obj):
    # Workaround for "Changing the dtype of a 0d array is only supported if the itemsize is unchanged" error
    return None


def _deserialize_numpy_masked_constant(obj):
    return np.ma.masked


serialization_context = pa.SerializationContext()
pa.register_default_serialization_handlers(serialization_context)

serialization_context.register_type(
    np.ma.MaskedArray,
    "numpy.ma.core.MaskedArray",
    custom_serializer=_serialize_numpy_masked_array,
    custom_deserializer=_deserialize_numpy_masked_array,
)

serialization_context.register_type(
    np.ma.core.MaskedConstant,
    "numpy.ma.core.MaskedConstant",
    custom_serializer=_serialize_numpy_masked_constant,
    custom_deserializer=_deserialize_numpy_masked_constant,
)
예제 #5
0
    msg.result = data["result"]
    msg.id = data["id"]
    msg.fields = data["fields"]
    msg.data = data["data"]
    return msg


def _serialize_StructDict(struct):
    return struct.__getstate__()


def _deserialize_StructDict(data):
    return StructDict(data)


pyarrow_context = pa.SerializationContext()
pyarrow_context.register_type(Message,
                              'Message',
                              custom_serializer=_serialize_Message,
                              custom_deserializer=_deserialize_Message)
pyarrow_context.register_type(StructDict,
                              'StructDict',
                              custom_serializer=_serialize_StructDict,
                              custom_deserializer=_deserialize_StructDict)


class RtMessagingClient:
    """Messaging client for connecting to a server and sending messages"""
    def __init__(self, serverAddr, serverPort):
        self.addr = serverAddr
        self.port = serverPort
예제 #6
0
def make_serialization_context():

    def array_custom_serializer(obj):
        return obj.tolist(), obj.dtype.str

    def array_custom_deserializer(serialized_obj):
        return np.array(serialized_obj[0], dtype=np.dtype(serialized_obj[1]))

    context = pa.SerializationContext()

    # This is for numpy arrays of "object" only; primitive types are handled
    # efficiently with Arrow's Tensor facilities (see python_to_arrow.cc)
    context.register_type(np.ndarray, 20 * b"\x00",
                          custom_serializer=array_custom_serializer,
                          custom_deserializer=array_custom_deserializer)

    context.register_type(Foo, 20 * b"\x01")
    context.register_type(Bar, 20 * b"\x02")
    context.register_type(Baz, 20 * b"\x03")
    context.register_type(Qux, 20 * b"\x04")
    context.register_type(SubQux, 20 * b"\x05")
    context.register_type(SubQuxPickle, 20 * b"\x05", pickle=True)
    context.register_type(Exception, 20 * b"\x06")
    context.register_type(CustomError, 20 * b"\x07")
    context.register_type(Point, 20 * b"\x08")
    context.register_type(NamedTupleExample, 20 * b"\x09")

    # TODO(pcm): This is currently a workaround until arrow supports
    # arbitrary precision integers. This is only called on long integers,
    # see the associated case in the append method in python_to_arrow.cc
    context.register_type(int, 20 * b"\x10", pickle=False,
                          custom_serializer=lambda obj: str(obj),
                          custom_deserializer=(
                              lambda serialized_obj: int(serialized_obj)))

    if (sys.version_info < (3, 0)):
        deserializer = (
            lambda serialized_obj: long(serialized_obj))  # noqa: E501,F821
        context.register_type(long, 20 * b"\x11", pickle=False,  # noqa: E501,F821
                              custom_serializer=lambda obj: str(obj),
                              custom_deserializer=deserializer)

    def ordered_dict_custom_serializer(obj):
        return list(obj.keys()), list(obj.values())

    def ordered_dict_custom_deserializer(obj):
        return OrderedDict(zip(obj[0], obj[1]))

    context.register_type(OrderedDict, 20 * b"\x12", pickle=False,
                          custom_serializer=ordered_dict_custom_serializer,
                          custom_deserializer=ordered_dict_custom_deserializer)

    def default_dict_custom_serializer(obj):
        return list(obj.keys()), list(obj.values()), obj.default_factory

    def default_dict_custom_deserializer(obj):
        return defaultdict(obj[2], zip(obj[0], obj[1]))

    context.register_type(defaultdict, 20 * b"\x13", pickle=False,
                          custom_serializer=default_dict_custom_serializer,
                          custom_deserializer=default_dict_custom_deserializer)

    context.register_type(type(lambda: 0), 20 * b"\x14", pickle=True)

    return context