def testDataSerialize(self): array = np.random.rand(1000, 100) assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.load(BytesIO(dataserializer.dumps(array)))) assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4)))) assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) array = np.float64(0.2345) assert_array_equal(array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) # test structured arrays. rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'), ('c', '<U8')]) array = np.ones((100, ), dtype=rec_dtype) array_loaded = dataserializer.loads(dataserializer.dumps(array)) self.assertEqual(array.dtype, array_loaded.dtype) assert_array_equal(array, array_loaded) fn = os.path.join(tempfile.gettempdir(), 'test_dump_file_%d.bin' % id(self)) try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.CompressType.LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.CompressType.GZIP) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) # test sparse if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps(mat, compress=dataserializer.CompressType.LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps( mat, compress=dataserializer.CompressType.GZIP)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)), shape=(2, )) des_vector = dataserializer.loads(dataserializer.dumps(vector)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps(vector, compress=dataserializer.CompressType.LZ4)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps( vector, compress=dataserializer.CompressType.GZIP)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) # test groupby df1 = pd.DataFrame({ 'a': [3, 4, 5, 3, 5, 4, 1, 2, 3], 'b': [1, 3, 4, 5, 6, 5, 4, 4, 4], 'c': list('aabaaddce') }) grouped = wrapped_groupby(df1, 'b') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1, 'b').c restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1, 'b') getattr(grouped, 'indices') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1.b, lambda x: x % 2) restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1.b, lambda x: x % 2) getattr(grouped, 'indices') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) # test categorical s = np.random.RandomState(0).random(10) cat = pd.cut(s, [0.3, 0.5, 0.8]) self.assertIsInstance(cat, pd.Categorical) des_cat = dataserializer.loads(dataserializer.dumps(cat)) self.assertEqual(len(cat), len(des_cat)) for c, dc in zip(cat, des_cat): np.testing.assert_equal(c, dc) # test IntervalIndex s = pd.interval_range(10, 100, 3) dest_s = dataserializer.loads((dataserializer.dumps(s))) pd.testing.assert_index_equal(s, dest_s)
def testDataSerialize(self): for type_, compress in itertools.product( (None,) + tuple(dataserializer.SerialType.__members__.values()), (None,) + tuple(dataserializer.CompressType.__members__.values())): array = np.random.rand(1000, 100) assert_array_equal(array, dataserializer.loads( dataserializer.dumps(array, serial_type=type_, compress=compress))) array = np.random.rand(1000, 100) assert_array_equal(array, dataserializer.load( BytesIO(dataserializer.dumps(array, serial_type=type_, compress=compress)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal(array, dataserializer.loads( dataserializer.dumps(array, serial_type=type_, compress=compress))) array = np.float64(0.2345) assert_array_equal(array, dataserializer.loads( dataserializer.dumps(array, serial_type=type_, compress=compress))) # test non-serializable object if pyarrow: non_serial = type('non_serial', (object,), dict(nbytes=10)) with self.assertRaises(SerializationFailed): dataserializer.dumps(non_serial()) # test structured arrays. rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'), ('c', '<U8')]) array = np.ones((100,), dtype=rec_dtype) array_loaded = dataserializer.loads(dataserializer.dumps(array)) self.assertEqual(array.dtype, array_loaded.dtype) assert_array_equal(array, array_loaded) fn = os.path.join(tempfile.gettempdir(), f'test_dump_file_{id(self)}.bin') try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.CompressType.LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file, compress=dataserializer.CompressType.GZIP) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) # test sparse if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads(dataserializer.dumps( mat, compress=dataserializer.CompressType.LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads(dataserializer.dumps( mat, compress=dataserializer.CompressType.GZIP)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)), shape=(2,)) des_vector = dataserializer.loads(dataserializer.dumps(vector)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads(dataserializer.dumps( vector, compress=dataserializer.CompressType.LZ4)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads(dataserializer.dumps( vector, compress=dataserializer.CompressType.GZIP)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) # test groupby df1 = pd.DataFrame({'a': [3, 4, 5, 3, 5, 4, 1, 2, 3], 'b': [1, 3, 4, 5, 6, 5, 4, 4, 4], 'c': list('aabaaddce')}) grouped = wrapped_groupby(df1, 'b') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1, 'b').c restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1, 'b') getattr(grouped, 'indices') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1.b, lambda x: x % 2) restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) grouped = wrapped_groupby(df1.b, lambda x: x % 2) getattr(grouped, 'indices') restored = dataserializer.loads(dataserializer.dumps(grouped)) assert_groupby_equal(grouped, restored.groupby_obj) # test categorical s = np.random.RandomState(0).random(10) cat = pd.cut(s, [0.3, 0.5, 0.8]) self.assertIsInstance(cat, pd.Categorical) des_cat = dataserializer.loads(dataserializer.dumps(cat)) self.assertEqual(len(cat), len(des_cat)) for c, dc in zip(cat, des_cat): np.testing.assert_equal(c, dc) # test IntervalIndex s = pd.interval_range(10, 100, 3) dest_s = dataserializer.loads((dataserializer.dumps(s))) pd.testing.assert_index_equal(s, dest_s) # test complex s = complex(10 + 5j) dest_s = dataserializer.loads((dataserializer.dumps(s))) self.assertIs(type(s), type(dest_s)) self.assertEqual(s, dest_s) s = np.complex64(10 + 5j) dest_s = dataserializer.loads((dataserializer.dumps(s))) self.assertIs(type(s), type(dest_s)) self.assertEqual(s, dest_s) # test pickle d = ClassToPickle(dict(a=1, b='uvw')) dest_d = dataserializer.loads((dataserializer.dumps(d))) self.assertIs(type(d), type(dest_d)) self.assertEqual(d.a, dest_d.a) # test ndarray with negative strides arr = np.zeros((5, 6, 3)) arr2 = arr[:, :, ::-1] dest_arr2 = dataserializer.loads(dataserializer.dumps(arr2)) np.testing.assert_array_equal(arr2, dest_arr2) # test ArrowArray df = pd.DataFrame({'a': ['s1', 's2', 's3'], 'b': [['s1', 's2'], ['s3'], ['s4', 's5']]}) df['a'] = df['a'].astype(ArrowStringDtype()) df['b'] = df['b'].astype(ArrowListDtype(str)) dest_df = dataserializer.loads(dataserializer.dumps(df)) self.assertIs(type(df), type(dest_df)) pd.testing.assert_frame_equal(df, dest_df) # test DataFrame with SparseDtype s = pd.Series([1, 2, np.nan, np.nan, 3]).astype( pd.SparseDtype(np.dtype(np.float64), np.nan)) dest_s = dataserializer.loads((dataserializer.dumps(s))) pd.testing.assert_series_equal(s, dest_s) df = pd.DataFrame({'s': s}) dest_df = dataserializer.loads((dataserializer.dumps(df))) pd.testing.assert_frame_equal(df, dest_df)
def testDataSerialize(self): try: import numpy as np from numpy.testing import assert_array_equal except ImportError: np = None try: import scipy.sparse as sps except ImportError: sps = None if np: array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) if not six.PY2: assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.load(BytesIO(dataserializer.dumps(array)))) assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4)))) if not six.PY2: assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) if not six.PY2: assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) array = np.float64(0.2345) assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.LZ4))) if not six.PY2: assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.CompressType.GZIP))) # test structured arrays. rec_dtype = np.dtype([('a', 'int64'), ('b', 'double'), ('c', '<U8')]) array = np.ones((100, ), dtype=rec_dtype) array_loaded = dataserializer.loads(dataserializer.dumps(array)) self.assertEqual(array.dtype, array_loaded.dtype) assert_array_equal(array, array_loaded) fn = os.path.join(tempfile.gettempdir(), 'test_dump_file_%d.bin' % id(self)) try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump( array, dump_file, compress=dataserializer.CompressType.LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) if not six.PY2: with open(fn, 'wb') as dump_file: dataserializer.dump( array, dump_file, compress=dataserializer.CompressType.GZIP) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps(mat, compress=dataserializer.CompressType.LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) if not six.PY2: des_mat = dataserializer.loads( dataserializer.dumps( mat, compress=dataserializer.CompressType.GZIP)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)), shape=(2, )) des_vector = dataserializer.loads(dataserializer.dumps(vector)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps(vector, compress=dataserializer.CompressType.LZ4)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) if not six.PY2: des_vector = dataserializer.loads( dataserializer.dumps( vector, compress=dataserializer.CompressType.GZIP)) self.assertTrue( (vector.spmatrix != des_vector.spmatrix).nnz == 0)
def testDataSerialize(self): try: import numpy as np from numpy.testing import assert_array_equal except ImportError: np = None try: import scipy.sparse as sps except ImportError: sps = None if np: array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4))) array = np.random.rand(1000, 100) assert_array_equal( array, dataserializer.load(BytesIO(dataserializer.dumps(array)))) assert_array_equal( array, dataserializer.load( BytesIO( dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4)))) array = np.random.rand(1000, 100).T # test non c-contiguous assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4))) array = np.float64(0.2345) assert_array_equal( array, dataserializer.loads(dataserializer.dumps(array))) assert_array_equal( array, dataserializer.loads( dataserializer.dumps( array, compress=dataserializer.COMPRESS_FLAG_LZ4))) fn = os.path.join(tempfile.gettempdir(), 'test_dump_file_%d.bin' % id(self)) try: array = np.random.rand(1000, 100).T # test non c-contiguous with open(fn, 'wb') as dump_file: dataserializer.dump(array, dump_file) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) with open(fn, 'wb') as dump_file: dataserializer.dump( array, dump_file, compress=dataserializer.COMPRESS_FLAG_LZ4) with open(fn, 'rb') as dump_file: assert_array_equal(array, dataserializer.load(dump_file)) finally: if os.path.exists(fn): os.unlink(fn) if sps: mat = sparse.SparseMatrix(sps.random(100, 100, 0.1, format='csr')) des_mat = dataserializer.loads(dataserializer.dumps(mat)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) des_mat = dataserializer.loads( dataserializer.dumps( mat, compress=dataserializer.COMPRESS_FLAG_LZ4)) self.assertTrue((mat.spmatrix != des_mat.spmatrix).nnz == 0) vector = sparse.SparseVector(sps.csr_matrix(np.random.rand(2)), shape=(2, )) des_vector = dataserializer.loads(dataserializer.dumps(vector)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0) des_vector = dataserializer.loads( dataserializer.dumps( vector, compress=dataserializer.COMPRESS_FLAG_LZ4)) self.assertTrue((vector.spmatrix != des_vector.spmatrix).nnz == 0)