def testCompressIO(self): if not np: return import pyarrow from numpy.testing import assert_array_equal data = np.random.random((1000, 100)) serialized = pyarrow.serialize(data).to_buffer() bio = BytesIO() reader = dataserializer.CompressBufferReader( pyarrow.py_buffer(serialized), dataserializer.COMPRESS_FLAG_LZ4) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() assert_array_equal(data, dataserializer.loads(compressed)) data_sink = bytearray(len(serialized)) compressed_mv = memoryview(compressed) writer = dataserializer.DecompressBufferWriter( pyarrow.py_buffer(data_sink)) pos = 0 while pos < len(compressed): endpos = min(pos + 128, len(compressed)) writer.write(compressed_mv[pos:endpos]) pos = endpos assert_array_equal(data, pyarrow.deserialize(data_sink))
def testDataSerialize(self): try: import numpy as np from numpy.testing import assert_array_equal except ImportError: np = None try: import scipy.sparse as sps except ImportError: sps = None if np: array = np.random.rand(1000, 100) assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal(array, dataserializer.loads(dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4))) array = np.random.rand(1000, 100) assert_array_equal(array, dataserializer.load(BytesIO(dataserializer.dumps(array)))) assert_array_equal(array, dataserializer.load(BytesIO(dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal(array, dataserializer.loads(dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4))) array = np.float64(0.2345) assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal(array, dataserializer.loads(dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4))) fn = os.path.join(tempfile.gettempdir(), 'test_dump_file_%d.bin' % id(self)) try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.COMPRESS_FLAG_LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads(dataserializer.dumps( mat, compress=dataserializer.COMPRESS_FLAG_LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0)
def _read_data_batch(reader): bio = BytesIO() with reader: while True: buf = reader.read(io_size) if buf: bio.write(buf) else: break return dataserializer.loads(bio.getvalue())
def testArrowBufferIO(self): if not np: return import pyarrow from numpy.testing import assert_array_equal for compress in [ dataserializer.CompressType.LZ4, dataserializer.CompressType.GZIP ]: if compress not in dataserializer.get_supported_compressions(): continue data = np.random.random((1000, 100)) serialized = pyarrow.serialize(data).to_buffer() # test complete read reader = ArrowBufferIO(pyarrow.py_buffer(serialized), 'r', compress_out=compress) assert_array_equal(data, dataserializer.loads(reader.read())) # test partial read reader = ArrowBufferIO(pyarrow.py_buffer(serialized), 'r', compress_out=compress) block = reader.read(128) data_left = reader.read() assert_array_equal(data, dataserializer.loads(block + data_left)) # test read by chunks bio = BytesIO() reader = ArrowBufferIO(pyarrow.py_buffer(serialized), 'r', compress_out=compress) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() assert_array_equal(data, dataserializer.loads(compressed)) # test write by chunks data_sink = bytearray(len(serialized)) compressed_mv = memoryview(compressed) writer = ArrowBufferIO(pyarrow.py_buffer(data_sink), 'w') pos = 0 while pos < len(compressed): endpos = min(pos + 128, len(compressed)) writer.write(compressed_mv[pos:endpos]) pos = endpos assert_array_equal(data, pyarrow.deserialize(data_sink))
def create_data_writer(self, session_id, chunk_key, data_size, sender_ref, ensure_cached=True, timeout=0, callback=None): from mars.compat import BytesIO query_key = (session_id, chunk_key) if query_key in self._data_metas and \ self._data_metas[query_key].status in (ReceiveStatus.RECEIVED, ReceiveStatus.RECEIVING): self.tell_promise(callback, self.address, self._data_metas[query_key].status) return self._data_metas[query_key] = ReceiverDataMeta(chunk_size=data_size, status=ReceiveStatus.RECEIVING) self._data_writers[query_key] = BytesIO() self.tell_promise(callback, self.address, None)
def testDataSerialize(self): try: import numpy as np from numpy.testing import assert_array_equal except ImportError: np = None try: import scipy.sparse as sps except ImportError: sps = None if np: array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) if not six.PY2: assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.load(BytesIO(dataserializer.dumps(array)))) assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4)))) if not six.PY2: assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) if not six.PY2: assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) array = np.float64(0.2345) assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) if not six.PY2: assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) # test structured arrays. rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'), ('c', '<U8')]) array = np.ones((100, ), dtype=rec_dtype) array_loaded = dataserializer.loads(dataserializer.dumps(array)) self.assertEqual(array.dtype, array_loaded.dtype) assert_array_equal(array, array_loaded) fn = os.path.join(tempfile.gettempdir(), 'test_dump_file_%d.bin' % id(self)) try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump( array, dump_file, compress=dataserializer.CompressType.LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) if not six.PY2: with open(fn, 'wb') as dump_file: dataserializer.dump( array, dump_file, compress=dataserializer.CompressType.GZIP) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps(mat, compress=dataserializer.CompressType.LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) if not six.PY2: des_mat = dataserializer.loads( dataserializer.dumps( mat, compress=dataserializer.CompressType.GZIP)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)), shape=(2, )) des_vector = dataserializer.loads(dataserializer.dumps(vector)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps(vector, compress=dataserializer.CompressType.LZ4)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) if not six.PY2: des_vector = dataserializer.loads( dataserializer.dumps( vector, compress=dataserializer.CompressType.GZIP)) self.assertTrue( (vector.spmatrix != des_vector.spmatrix).nnz == 0)
def testFileBufferIO(self): if not np: return from numpy.testing import assert_array_equal compressions = [dataserializer.CompressType.NONE] + \ list(dataserializer.get_supported_compressions()) for c1 in compressions: for c2 in compressions: data = np.random.random((1000, 100)) # test complete read compressed_read_file = BytesIO( dataserializer.dumps(data, compress=c1)) reader = FileBufferIO(compressed_read_file, 'r', compress_out=c2) compressed = reader.read() self.assertEqual( c2, dataserializer.read_file_header(compressed).compress) assert_array_equal(data, dataserializer.loads(compressed)) # test partial read compressed_read_file = BytesIO( dataserializer.dumps(data, compress=c1)) reader = FileBufferIO(compressed_read_file, 'r', compress_out=c2) block = reader.read(128) data_left = reader.read() assert_array_equal(data, dataserializer.loads(block + data_left)) # test read by chunks bio = BytesIO() compressed_read_file = BytesIO( dataserializer.dumps(data, compress=c1)) reader = FileBufferIO(compressed_read_file, 'r', compress_out=c2) while True: block = reader.read(128) if not block: break bio.write(block) compressed = bio.getvalue() self.assertEqual( c2, dataserializer.read_file_header(compressed).compress) assert_array_equal(data, dataserializer.loads(compressed)) # test write by chunks compressed_read_file.seek(0) compressed_write_file = BytesIO() writer = FileBufferIO(compressed_write_file, 'w', compress_in=c2, managed=False) while True: block = compressed_read_file.read(128) if not block: break writer.write(block) writer.close() compressed = compressed_write_file.getvalue() self.assertEqual( c2, dataserializer.read_file_header(compressed).compress) assert_array_equal(data, dataserializer.loads(compressed))