def testCompressIO(self): if not np: return import pyarrow from numpy.testing import assert_array_equal data = np.random.random((1000, 100)) serialized = pyarrow.serialize(data).to_buffer() bio = BytesIO() reader = dataserializer.CompressBufferReader( pyarrow.py_buffer(serialized), dataserializer.COMPRESS_FLAG_LZ4) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() assert_array_equal(data, dataserializer.loads(compressed)) data_sink = bytearray(len(serialized)) compressed_mv = memoryview(compressed) writer = dataserializer.DecompressBufferWriter( pyarrow.py_buffer(data_sink)) pos = 0 while pos < len(compressed): endpos = min(pos + 128, len(compressed)) writer.write(compressed_mv[pos:endpos]) pos = endpos assert_array_equal(data, pyarrow.deserialize(data_sink))
def _read_data_batch(reader): bio = BytesIO() with reader: while True: buf = reader.read(io_size) if buf: bio.write(buf) else: break return dataserializer.loads(bio.getvalue())
def testArrowBufferIO(self): if not np: return import pyarrow from numpy.testing import assert_array_equal for compress in [ dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP ]: if compress not in dataserializer.get_supported_compressions(): continue data = np.random.random((1000, 100)) serialized = pyarrow.serialize(data).to_buffer() # test complete read reader = ArrowBufferIO(pyarrow.py_buffer(serialized), 'r', compress_out=compress) assert_array_equal(data, dataserializer.loads(reader.read())) # test partial read reader = ArrowBufferIO(pyarrow.py_buffer(serialized), 'r', compress_out=compress) block = reader.read(128) data_left = reader.read() assert_array_equal(data, dataserializer.loads(block + data_left)) # test read by chunks bio = BytesIO() reader = ArrowBufferIO(pyarrow.py_buffer(serialized), 'r', compress_out=compress) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() assert_array_equal(data, dataserializer.loads(compressed)) # test write by chunks data_sink = bytearray(len(serialized)) compressed_mv = memoryview(compressed) writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w') pos = 0 while pos < len(compressed): endpos = min(pos + 128, len(compressed)) writer.write(compressed_mv[pos:endpos]) pos = endpos assert_array_equal(data, pyarrow.deserialize(data_sink))
def testFileBufferIO(self): if not np: return from numpy.testing import assert_array_equal compressions = [dataserializer.CompressType.NONE] + \ list(dataserializer.get_supported_compressions()) for c1 in compressions: for c2 in compressions: data = np.random.random((1000, 100)) # test complete read compressed_read_file = BytesIO(dataserializer.dumps(data, compress=c1)) reader = FileBufferIO(compressed_read_file, 'r', compress_out=c2) compressed = reader.read() self.assertEqual(c2, dataserializer.read_file_header(compressed).compress) assert_array_equal(data, dataserializer.loads(compressed)) # test partial read compressed_read_file = BytesIO(dataserializer.dumps(data, compress=c1)) reader = FileBufferIO(compressed_read_file, 'r', compress_out=c2) block = reader.read(128) data_left = reader.read() assert_array_equal(data, dataserializer.loads(block + data_left)) # test read by chunks bio = BytesIO() compressed_read_file = BytesIO(dataserializer.dumps(data, compress=c1)) reader = FileBufferIO(compressed_read_file, 'r', compress_out=c2) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() self.assertEqual(c2, dataserializer.read_file_header(compressed).compress) assert_array_equal(data, dataserializer.loads(compressed)) # test write by chunks compressed_read_file.seek(0) compressed_write_file = BytesIO() writer = FileBufferIO(compressed_write_file, 'w', compress_in=c2, managed=False) while True: block = compressed_read_file.read(128) if not block: break writer.write(block) writer.close() compressed = compressed_write_file.getvalue() self.assertEqual(c2, dataserializer.read_file_header(compressed).compress) assert_array_equal(data, dataserializer.loads(compressed))