def test_compress_decompress(): INPUT_SIZE = 10000 test_data = (np.random.randint(0, 255, size=INPUT_SIZE) .astype(np.uint8) .tostring()) test_buf = pa.frombuffer(test_data) codecs = ['lz4', 'snappy', 'gzip', 'zstd', 'brotli'] for codec in codecs: compressed_buf = pa.compress(test_buf, codec=codec) compressed_bytes = pa.compress(test_data, codec=codec, asbytes=True) assert isinstance(compressed_bytes, bytes) decompressed_buf = pa.decompress(compressed_buf, INPUT_SIZE, codec=codec) decompressed_bytes = pa.decompress(compressed_bytes, INPUT_SIZE, codec=codec, asbytes=True) assert isinstance(decompressed_bytes, bytes) assert decompressed_buf.equals(test_buf) assert decompressed_bytes == test_data with pytest.raises(ValueError): pa.decompress(compressed_bytes, codec=codec)
def test_buffer_numpy(): # Make sure creating a numpy array from an arrow buffer works byte_array = bytearray(20) byte_array[0] = 42 buf = pa.frombuffer(byte_array) array = np.frombuffer(buf, dtype="uint8") assert array[0] == byte_array[0] assert array.base == buf
def test_buffer_bytearray(): val = bytearray(b'some data') buf = pa.frombuffer(val) assert isinstance(buf, io.Buffer) result = bytearray(buf) assert result == val
def test_buffer_memoryview(): val = b'some data' buf = pa.frombuffer(val) assert isinstance(buf, io.Buffer) result = memoryview(buf) assert result == val
def test_buffer_bytes(): val = b'some data' buf = pa.frombuffer(val) assert isinstance(buf, io.Buffer) result = buf.to_pybytes() assert result == val
def test_deserialize_buffer_in_different_process(): import tempfile import subprocess f = tempfile.NamedTemporaryFile(delete=False) b = pa.serialize(pa.frombuffer(b'hello')).to_buffer() f.write(b.to_pybytes()) f.close() subprocess_env = _get_modified_env_with_pythonpath() dir_path = os.path.dirname(os.path.realpath(__file__)) python_file = os.path.join(dir_path, 'deserialize_buffer.py') subprocess.check_call([sys.executable, python_file, f.name], env=subprocess_env)
def test_buffer_memoryview_is_immutable(): val = b'some data' buf = pa.frombuffer(val) assert isinstance(buf, pa.Buffer) result = memoryview(buf) with pytest.raises(TypeError) as exc: result[0] = b'h' assert 'cannot modify read-only' in str(exc.value) b = bytes(buf) with pytest.raises(TypeError) as exc: b[0] = b'h' assert 'cannot modify read-only' in str(exc.value)
def test_buffer_memoryview_is_immutable(): val = b'some data' buf = pa.frombuffer(val) assert isinstance(buf, io.Buffer) result = memoryview(buf) with pytest.raises(TypeError) as exc: result[0] = b'h' assert 'cannot modify read-only' in str(exc.value) b = bytes(buf) with pytest.raises(TypeError) as exc: b[0] = b'h' assert 'cannot modify read-only' in str(exc.value)
def test_serialize_to_components_invalid_cases(): buf = pa.frombuffer(b'hello') components = { 'num_tensors': 0, 'num_buffers': 1, 'data': [buf] } with pytest.raises(pa.ArrowException): pa.deserialize_components(components) components = { 'num_tensors': 1, 'num_buffers': 0, 'data': [buf, buf] } with pytest.raises(pa.ArrowException): pa.deserialize_components(components)
def make_buffer(bytes_obj): return bytearray(pa.frombuffer(bytes_obj))
def test_buffer_hashing(): # Buffers are unhashable with pytest.raises(TypeError, match="unhashable"): hash(pa.frombuffer(b'123'))
def test_buffer_invalid(): with pytest.raises(TypeError, match="(bytes-like object|buffer interface)"): pa.frombuffer(None)
def serialize_buffer_class(obj): return pa.frombuffer(b"hello")
def _pyarrow_deserialize(bstring): return pyarrow.deserialize(pyarrow.frombuffer(bstring))