def testSparseMinimum(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) np.testing.assert_array_equal( s1.minimum(s2).toarray(), self.s1.minimum(self.s2).toarray())
def testSparseSubtract(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 - s2, self.s1 - self.s2) self.assertArrayEqual(s1 - self.d1, self.s1 - self.d1) self.assertArrayEqual(self.d1 - s1, self.d1 - self.s1) r = sps.csr_matrix( ((self.s1.data - 1), self.s1.indices, self.s1.indptr), self.s1.shape) self.assertArrayEqual(s1 - 1, r) r = sps.csr_matrix( ((1 - self.s1.data), self.s1.indices, self.s1.indptr), self.s1.shape) self.assertArrayEqual(1 - s1, r) # test sparse vector v = SparseNDArray(self.v1, shape=(3, )) self.assertArrayEqual(v - v, self.v1_data - self.v1_data) self.assertArrayEqual(v - self.d1, self.v1_data - self.d1) self.assertArrayEqual(self.d1 - v, self.d1 - self.v1_data) r = sps.csr_matrix( ((self.v1.data - 1), self.v1.indices, self.v1.indptr), self.v1.shape) self.assertArrayEqual(v - 1, r.toarray().reshape(3)) r = sps.csr_matrix( ((1 - self.v1.data), self.v1.indices, self.v1.indptr), self.v1.shape) self.assertArrayEqual(1 - v, r.toarray().reshape(3))
def testSparseDot(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) v1 = SparseNDArray(self.v1, shape=(3, )) v2 = SparseNDArray(self.v2, shape=(2, )) self.assertArrayEqual(mls.dot(s1, s2.T), self.s1.dot(self.s2.T)) self.assertArrayEqual(s1.dot(self.d1), self.s1.dot(self.d1)) self.assertArrayEqual(self.d1.dot(s1.T), self.d1.dot(self.s1.T.toarray())) self.assertArrayEqual(mls.tensordot(s1, s2.T, axes=(1, 0)), self.s1.dot(self.s2.T)) self.assertArrayEqual(mls.tensordot(s1, self.d1, axes=(1, -1)), self.s1.dot(self.d1)) self.assertArrayEqual(mls.tensordot(self.d1, s1.T, axes=(0, 0)), self.d1.dot(self.s1.T.toarray())) self.assertArrayEqual(mls.dot(s1, v1), self.s1.dot(self.v1_data)) self.assertArrayEqual(mls.dot(s2, v1), self.s2.dot(self.v1_data)) self.assertArrayEqual(mls.dot(v2, s1), self.v2_data.dot(self.s1.A)) self.assertArrayEqual(mls.dot(v2, s2), self.v2_data.dot(self.s2.A)) self.assertArrayEqual(mls.dot(v1, v1), self.v1_data.dot(self.v1_data)) self.assertArrayEqual(mls.dot(v2, v2), self.v2_data.dot(self.v2_data)) self.assertArrayEqual(mls.dot(v2, s1, sparse=False), self.v2_data.dot(self.s1.A)) self.assertArrayEqual(mls.dot(v1, v1, sparse=False), self.v1_data.dot(self.v1_data))
def testSparseAdd(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 + s2, self.s1 + self.s2) self.assertArrayEqual(s1 + self.d1, self.s1 + self.d1) self.assertArrayEqual(self.d1 + s1, self.d1 + self.s1) r = sps.csr_matrix( ((self.s1.data + 1), self.s1.indices, self.s1.indptr), self.s1.shape) self.assertArrayEqual(s1 + 1, r) r = sps.csr_matrix( ((1 + self.s1.data), self.s1.indices, self.s1.indptr), self.s1.shape) self.assertArrayEqual(1 + s1, r) # test sparse vector v = SparseNDArray(self.v1, shape=(3, )) self.assertArrayEqual(v + v, self.v1_data + self.v1_data) self.assertArrayEqual(v + self.d1, self.v1_data + self.d1) self.assertArrayEqual(self.d1 + v, self.d1 + self.v1_data) r = sps.csr_matrix( ((self.v1.data + 1), self.v1.indices, self.v1.indptr), self.v1.shape) self.assertArrayEqual(v + 1, r.toarray().reshape(3)) r = sps.csr_matrix( ((1 + self.v1.data), self.v1.indices, self.v1.indptr), self.v1.shape) self.assertArrayEqual(1 + v, r.toarray().reshape(3))
def testSparseBin(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) v1 = SparseNDArray(self.v1, shape=(3, )) for method in ('fmod', 'logaddexp', 'logaddexp2', 'equal', 'not_equal', 'less', 'less_equal', 'greater', 'greater_equal', 'hypot', 'arctan2'): lm, rm = getattr(mls, method), getattr(np, method) self.assertArrayEqual(lm(s1, s2), rm(self.s1.toarray(), self.s2.toarray())) self.assertArrayEqual(lm(s1, self.d1), rm(self.s1.toarray(), self.d1)) self.assertArrayEqual(lm(self.d1, s1), rm(self.d1, self.s1.toarray())) r1 = sps.csr_matrix( (rm(self.s1.data, 2), self.s1.indices, self.s1.indptr), self.s1.shape) self.assertArrayEqual(lm(s1, 2), r1) r2 = sps.csr_matrix( (rm(2, self.s1.data), self.s1.indices, self.s1.indptr), self.s1.shape) self.assertArrayEqual(lm(2, s1), r2) # test sparse self.assertArrayEqual(lm(v1, v1), rm(self.v1_data, self.v1_data)) self.assertArrayEqual(lm(v1, self.d1), rm(self.v1_data, self.d1)) self.assertArrayEqual(lm(self.d1, v1), rm(self.d1, self.v1_data)) self.assertArrayEqual(lm(v1, 2), rm(self.v1_data, 2)) self.assertArrayEqual(lm(2, v1), rm(2, self.v1_data))
def testSparseAdd(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 + s2, self.s1 + self.s2) self.assertArrayEqual(s1 + self.d1, self.s1 + self.d1) self.assertArrayEqual(self.d1 + s1, self.d1 + self.s1) self.assertArrayEqual(s1 + 1, self.s1.toarray() + 1) self.assertArrayEqual(1 + s1, self.s1.toarray() + 1)
def testSparseSubtract(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 - s2, self.s1 - self.s2) self.assertArrayEqual(s1 - self.d1, self.s1 - self.d1) self.assertArrayEqual(self.d1 - s1, self.d1 - self.s1) self.assertArrayEqual(s1 - 1, self.s1.toarray() - 1) self.assertArrayEqual(1 - s1, 1 - self.s1.toarray())
def testSparseMultiply(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 * s2, self.s1.multiply(self.s2)) self.assertArrayEqual(s1 * self.d1, self.s1.multiply(self.d1)) self.assertArrayEqual(self.d1 * s1, self.s1.multiply(self.d1)) self.assertArrayEqual(s1 * 2, self.s1 * 2) self.assertArrayEqual(2 * s1, self.s1 * 2)
def test_sparse_sum(): s1 = SparseNDArray(s1_data) v = SparseNDArray(v1, shape=(3, )) assert s1.sum() == s1.sum() np.testing.assert_array_equal(s1.sum(axis=1), np.asarray(s1.sum(axis=1)).reshape(2)) np.testing.assert_array_equal(s1.sum(axis=0), np.asarray(s1.sum(axis=0)).reshape(3)) np.testing.assert_array_equal(v.sum(), np.asarray(v1_data.sum()))
def testSparseFloorDivide(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 // s2, self.s1.toarray() // self.s2.toarray()) self.assertArrayEqual(s1 // self.d1, self.s1.toarray() // self.d1) self.assertArrayEqual(self.d1 // s1, self.d1 // self.s1.toarray()) self.assertArrayEqual(s1 // 2, self.s1.toarray() // 2) self.assertArrayEqual(2 // s1, 2 // self.s1.toarray())
def testSparsePower(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 ** s2, self.s1.toarray() ** self.s2.toarray()) self.assertArrayEqual(s1 ** self.d1, self.s1.toarray() ** self.d1) self.assertArrayEqual(self.d1 ** s1, self.d1 ** self.s1.toarray()) self.assertArrayEqual(s1 ** 2, self.s1.power(2)) self.assertArrayEqual(2 ** s1, 2 ** self.s1.toarray())
def testSparseMod(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 % s2, self.s1.toarray() % self.s2.toarray()) self.assertArrayEqual(s1 % self.d1, self.s1.toarray() % self.d1) self.assertArrayEqual(self.d1 % s1, self.d1 % self.s1.toarray()) self.assertArrayEqual(s1 % 2, self.s1.toarray() % 2) self.assertArrayEqual(2 % s1, 2 % self.s1.toarray())
def test_sparse_minimum(): s1 = SparseNDArray(s1_data) s2 = SparseNDArray(s2_data) np.testing.assert_array_equal( s1.minimum(s2).toarray(), s1.minimum(s2).toarray()) v = SparseVector(v1, shape=(3, )) np.testing.assert_array_equal(v.minimum(d1), np.minimum(v1_data, d1))
def testEuclideanDistancesExecution(self): dense_raw_x = np.random.rand(30, 10) dense_raw_y = np.random.rand(40, 10) sparse_raw_x = SparseNDArray( sps.random(30, 10, density=0.5, format='csr')) sparse_raw_y = SparseNDArray( sps.random(40, 10, density=0.5, format='csr')) for raw_x, raw_y in [(dense_raw_x, dense_raw_y), (sparse_raw_x, sparse_raw_y)]: x = mt.tensor(raw_x, chunk_size=9) y = mt.tensor(raw_y, chunk_size=7) distance = euclidean_distances(x, y) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, Y=raw_y) np.testing.assert_almost_equal(result, expected) x_norm = x.sum(axis=1)[..., np.newaxis] y_norm = y.sum(axis=1)[np.newaxis, ...] distance = euclidean_distances(x, y, X_norm_squared=x_norm, Y_norm_squared=y_norm) x_raw_norm = raw_x.sum(axis=1)[..., np.newaxis] y_raw_norm = raw_y.sum(axis=1)[np.newaxis, ...] result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x, raw_y, X_norm_squared=x_raw_norm, Y_norm_squared=y_raw_norm) np.testing.assert_almost_equal(result, expected) x_sq = (x**2).astype(np.float32) y_sq = (y**2).astype(np.float32) distance = euclidean_distances(x_sq, y_sq, squared=True) x_raw_sq = (raw_x**2).astype(np.float32) y_raw_sq = (raw_y**2).astype(np.float32) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True) np.testing.assert_almost_equal(result, expected, decimal=6) # test x is y distance = euclidean_distances(x) result = self.executor.execute_tensor(distance, concat=True)[0] expected = sk_euclidean_distances(raw_x) np.testing.assert_almost_equal(result, expected)
def testSparseMinimum(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) np.testing.assert_array_equal( s1.minimum(s2).toarray(), self.s1.minimum(self.s2).toarray()) v1 = SparseVector(self.v1, shape=(3, )) np.testing.assert_array_equal(v1.minimum(self.d1), np.minimum(self.v1_data, self.d1))
def testSparseSum(self): s1 = SparseNDArray(self.s1) v1 = SparseNDArray(self.v1, shape=(3, )) self.assertEqual(s1.sum(), self.s1.sum()) np.testing.assert_array_equal( s1.sum(axis=1), np.asarray(self.s1.sum(axis=1)).reshape(2)) np.testing.assert_array_equal( s1.sum(axis=0), np.asarray(self.s1.sum(axis=0)).reshape(3)) np.testing.assert_array_equal(v1.sum(), np.asarray(self.v1_data.sum()))
def testSparseBin(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) for method in ('fmod', 'logaddexp', 'logaddexp2', 'equal', 'not_equal', 'less', 'less_equal', 'greater', 'greater_equal', 'hypot'): lm, rm = getattr(mls, method), getattr(np, method) self.assertArrayEqual(lm(s1, s2), rm(self.s1.toarray(), self.s2.toarray())) self.assertArrayEqual(lm(s1, self.d1), rm(self.s1.toarray(), self.d1)) self.assertArrayEqual(lm(self.d1, s1), rm(self.d1, self.s1.toarray())) self.assertArrayEqual(lm(s1, 2), rm(self.s1.toarray(), 2)) self.assertArrayEqual(lm(2, s1), rm(2, self.s1.toarray()))
def test_sparse_creation(): s = SparseNDArray(s1_data) assert s.ndim == 2 assert isinstance(s, SparseMatrix) assertArrayEqual(s.toarray(), s1_data.A) assertArrayEqual(s.todense(), s1_data.A) v = SparseNDArray(v1, shape=(3, )) assert s.ndim assert isinstance(v, SparseVector) assert v.shape == (3, ) assertArrayEqual(v.todense(), v1_data) assertArrayEqual(v.toarray(), v1_data) assertArrayEqual(v, v1_data)
def testSparseCreation(self): s = SparseNDArray(self.s1) self.assertEqual(s.ndim, 2) self.assertIsInstance(s, SparseMatrix) self.assertArrayEqual(s.toarray(), self.s1.A) self.assertArrayEqual(s.todense(), self.s1.A) v = SparseNDArray(self.v1, shape=(3,)) self.assertTrue(s.ndim, 1) self.assertIsInstance(v, SparseVector) self.assertEqual(v.shape, (3,)) self.assertArrayEqual(v.todense(), self.v1_data) self.assertArrayEqual(v.toarray(), self.v1_data) self.assertArrayEqual(v, self.v1_data)
def test_sparse_unary(): s1 = SparseNDArray(s1_data) v = SparseNDArray(v1, shape=(3, )) for method in ('negative', 'positive', 'absolute', 'abs', 'fabs', 'rint', 'sign', 'conj', 'exp', 'exp2', 'log', 'log2', 'log10', 'expm1', 'log1p', 'sqrt', 'square', 'cbrt', 'reciprocal', 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'arcsinh', 'arccosh', 'arctanh', 'deg2rad', 'rad2deg', 'angle', 'isnan', 'isinf', 'signbit', 'sinc', 'isreal', 'isfinite'): lm, rm = getattr(mls, method), getattr(np, method) r = sps.csr_matrix((rm(s1.data), s1.indices, s1.indptr), s1.shape) assertArrayEqual(lm(s1), r) assertArrayEqual(lm(v), rm(v1_data))
def testSparseAdd(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 + s2, self.s1 + self.s2) self.assertArrayEqual(s1 + self.d1, self.s1 + self.d1) self.assertArrayEqual(self.d1 + s1, self.d1 + self.s1) self.assertArrayEqual(s1 + 1, self.s1.toarray() + 1) self.assertArrayEqual(1 + s1, self.s1.toarray() + 1) # test sparse vector v = SparseNDArray(self.v1, shape=(3,)) self.assertArrayEqual(v + v, self.v1_data + self.v1_data) self.assertArrayEqual(v + self.d1, self.v1_data + self.d1) self.assertArrayEqual(self.d1 + v, self.d1 + self.v1_data)
def testSparseSubtract(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(s1 - s2, self.s1 - self.s2) self.assertArrayEqual(s1 - self.d1, self.s1 - self.d1) self.assertArrayEqual(self.d1 - s1, self.d1 - self.s1) self.assertArrayEqual(s1 - 1, self.s1.toarray() - 1) self.assertArrayEqual(1 - s1, 1 - self.s1.toarray()) # test sparse vector v = SparseNDArray(self.v1, shape=(3,)) self.assertArrayEqual(v - v, self.v1_data - self.v1_data) self.assertArrayEqual(v - self.d1, self.v1_data - self.d1) self.assertArrayEqual(self.d1 - v, self.d1 - self.v1_data)
def testSparseDot(self): s1 = SparseNDArray(self.s1) s2 = SparseNDArray(self.s2) self.assertArrayEqual(mls.dot(s1, s2.T), self.s1.dot(self.s2.T)) self.assertArrayEqual(s1.dot(self.d1), self.s1.dot(self.d1)) self.assertArrayEqual(self.d1.dot(s1.T), self.d1.dot(self.s1.T.toarray())) self.assertArrayEqual(mls.tensordot(s1, s2.T, axes=(1, 0)), self.s1.dot(self.s2.T)) self.assertArrayEqual(mls.tensordot(s1, self.d1, axes=(1, -1)), self.s1.dot(self.d1)) self.assertArrayEqual(mls.tensordot(self.d1, s1.T, axes=(0, 0)), self.d1.dot(self.s1.T.toarray()))
def test_sparse_multiply(): s1 = SparseNDArray(s1_data) s2 = SparseNDArray(s2_data) assertArrayEqual(s1 * s2, s1_data.multiply(s2_data)) assertArrayEqual(s1 * d1, s1_data.multiply(d1)) assertArrayEqual(d1 * s1, s1_data.multiply(d1)) assertArrayEqual(s1 * 2, s1 * 2) assertArrayEqual(2 * s1, s1 * 2) # test sparse vector v = SparseNDArray(v1, shape=(3, )) assertArrayEqual(v * v, v1_data * v1_data) assertArrayEqual(v * d1, v1_data * d1) assertArrayEqual(d1 * v, d1 * v1_data) r = sps.csr_matrix(((v1.data * 1), v1.indices, v1.indptr), v1.shape) assertArrayEqual(v * 1, r.toarray().reshape(3)) r = sps.csr_matrix(((1 * v1.data), v1.indices, v1.indptr), v1.shape) assertArrayEqual(1 * v, r.toarray().reshape(3))
def test_sparse_power(): s1 = SparseNDArray(s1_data) s2 = SparseNDArray(s2_data) assertArrayEqual(s1**s2, s1.toarray()**s2.toarray()) assertArrayEqual(s1**d1, s1.toarray()**d1) assertArrayEqual(d1**s1, d1**s1.toarray()) assertArrayEqual(s1**2, s1_data.power(2)) assertArrayEqual(2**s1, 2**s1.toarray()) # test sparse vector v = SparseNDArray(v1, shape=(3, )) assertArrayEqual(v**v, v1_data**v1_data) assertArrayEqual(v**d1, v1_data**d1) assertArrayEqual(d1**v, d1**v1_data) r = sps.csr_matrix(((v1.data**1), v1.indices, v1.indptr), v1.shape) assertArrayEqual(v**1, r.toarray().reshape(3)) r = sps.csr_matrix(((1**v1.data), v1.indices, v1.indptr), v1.shape) assertArrayEqual(1**v, r.toarray().reshape(3))
def test_sparse_mod(): s1 = SparseNDArray(s1_data) s2 = SparseNDArray(s2_data) assertArrayEqual(s1 % s2, s1.toarray() % s2.toarray()) assertArrayEqual(s1 % d1, s1.toarray() % d1) assertArrayEqual(d1 % s1, d1 % s1.toarray()) assertArrayEqual(s1 % 2, s1.toarray() % 2) assertArrayEqual(2 % s1, 2 % s1.toarray()) # test sparse vector v = SparseNDArray(v1, shape=(3, )) assertArrayEqual(v % v, v1_data % v1_data) assertArrayEqual(v % d1, v1_data % d1) assertArrayEqual(d1 % v, d1 % v1_data) r = sps.csr_matrix(((v1.data % 1), v1.indices, v1.indptr), v1.shape) assertArrayEqual(v % 1, r.toarray().reshape(3)) r = sps.csr_matrix(((1 % v1.data), v1.indices, v1.indptr), v1.shape) assertArrayEqual(1 % v, r.toarray().reshape(3))
def execute(cls, ctx, op): import pyarrow.parquet as pq import pandas as pd import scipy.sparse as sps from mars.lib.sparse import SparseNDArray from ..io import open as fs_open dfs = [] storage_opts = json.loads(op.storage_options) for p in op.paths: with fs_open(p, 'rb', **storage_opts) as inp_file: f = inp_file.read() dfs.append(pq.read_table(BytesIO(f)).to_pandas()) chunk = op.outputs[0] if op.sparse and len(dfs) == 0: if len(chunk.shape) == 1: csr_array = sps.csr_matrix((chunk.shape[0], 1)) ctx[chunk.key] = SparseNDArray(csr_array, shape=chunk.shape) else: csr_array = sps.csr_matrix(chunk.shape) ctx[chunk.key] = SparseNDArray(csr_array) return df_merged = pd.concat(dfs, ignore_index=True) dim_arrays = [df_merged[col] for col in op.dim_cols] value_array = df_merged[op.value_col].astype(chunk.dtype) del df_merged if op.sparse: if len(chunk.shape) == 1: dim_arrays.append(np.zeros((len(dim_arrays[0])))) csr_array = sps.csr_matrix((value_array, tuple(dim_arrays)), shape=(chunk.shape[0], 1)) else: csr_array = sps.csr_matrix((value_array, tuple(dim_arrays)), shape=chunk.shape) del dim_arrays, value_array ctx[chunk.key] = SparseNDArray(csr_array, shape=chunk.shape) else: arr = np.empty(chunk.shape, dtype=value_array.dtype) arr[tuple(dim_arrays)] = value_array ctx[chunk.key] = arr
def testSparseUnary(self): s1 = SparseNDArray(self.s1) for method in ('negative', 'positive', 'absolute', 'abs', 'fabs', 'rint', 'sign', 'conj', 'exp', 'exp2', 'log', 'log2', 'log10', 'expm1', 'log1p', 'sqrt', 'square', 'cbrt', 'reciprocal', 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'arcsinh', 'arccosh', 'arctanh', 'deg2rad', 'rad2deg'): lm, rm = getattr(mls, method), getattr(np, method) self.assertArrayEqual(lm(s1), rm(self.s1.toarray()))
def test_sparse_floor_divide(): s1 = SparseNDArray(s1_data) s2 = SparseNDArray(s2_data) assertArrayEqual(s1 // s2, s1.toarray() // s2.toarray()) assertArrayEqual(s1 // d1, s1.toarray() // d1) assertArrayEqual(d1 // s1, d1 // s1.toarray()) assertArrayEqual(s1 // 2, s1.toarray() // 2) assertArrayEqual(2 // s1, 2 // s1.toarray()) # test sparse vector v = SparseNDArray(v1, shape=(3, )) assertArrayEqual(v // v, v1_data // v1_data) assertArrayEqual(v // d1, v1_data // d1) assertArrayEqual(d1 // v, d1 // v1_data) r = sps.csr_matrix(((v1.data // 1), v1.indices, v1.indptr), v1.shape) assertArrayEqual(v // 1, r.toarray().reshape(3)) r = sps.csr_matrix(((1 // v1.data), v1.indices, v1.indptr), v1.shape) assertArrayEqual(1 // v, r.toarray().reshape(3))
async def test_base_operations(storage_context): storage = storage_context data1 = np.random.rand(10, 10) put_info1 = await storage.put(data1) get_data1 = await storage.get(put_info1.object_id) np.testing.assert_array_equal(data1, get_data1) info1 = await storage.object_info(put_info1.object_id) # FIXME: remove os check when size issue fixed assert info1.size == put_info1.size or not sys.platform.startswith('linux') data2 = pd.DataFrame( { 'col1': np.arange(10), 'col2': [f'str{i}' for i in range(10)], 'col3': np.random.rand(10) }, ) put_info2 = await storage.put(data2) get_data2 = await storage.get(put_info2.object_id) pd.testing.assert_frame_equal(data2, get_data2) info2 = await storage.object_info(put_info2.object_id) # FIXME: remove os check when size issue fixed assert info2.size == put_info2.size or not sys.platform.startswith('linux') # FIXME: remove when list functionality is ready for vineyard. if not isinstance(storage, (VineyardStorage, SharedMemoryStorage, RayStorage)): num = len(await storage.list()) assert num == 2 await storage.delete(info2.object_id) # test SparseMatrix s1 = sps.csr_matrix([[1, 0, 1], [0, 0, 1]]) s = SparseNDArray(s1) put_info3 = await storage.put(s) get_data3 = await storage.get(put_info3.object_id) assert isinstance(get_data3, SparseMatrix) np.testing.assert_array_equal(get_data3.toarray(), s1.A) np.testing.assert_array_equal(get_data3.todense(), s1.A) # test writer and reader t = np.random.random(10) b = dataserializer.dumps(t) async with await storage.open_writer(size=len(b)) as writer: split = len(b) // 2 await writer.write(b[:split]) await writer.write(b[split:]) async with await storage.open_reader(writer.object_id) as reader: content = await reader.read() t2 = dataserializer.loads(content) np.testing.assert_array_equal(t, t2)