def test_pandas(): val = pd.Series([1, 2, 3, 4]) pd.testing.assert_series_equal(val, deserialize(*serialize(val))) val = pd.DataFrame({ 'a': np.random.rand(1000), 'b': np.random.choice(list('abcd'), size=(1000, )), 'c': np.random.randint(0, 100, size=(1000, )), }) pd.testing.assert_frame_equal(val, deserialize(*serialize(val)))
def test_nested_dict(): val = {i: 'b' * 100 for i in range(10)} val[0] = val deserialized = deserialize(*serialize(val)) assert deserialized[0] is deserialized val = KeyedDict(abcd='efgh') val[val] = val deserialized = deserialize(*serialize(val)) assert deserialized[val] is deserialized
async def test_cuda_backend(): params, teardown_params = await CudaStorage.setup() storage = CudaStorage(**params) assert storage.level == StorageLevel.GPU data1 = cupy.asarray(np.random.rand(10, 10)) put_info1 = await storage.put(data1) get_data1 = await storage.get(put_info1.object_id) cupy.testing.assert_array_equal(data1, get_data1) info1 = await storage.object_info(put_info1.object_id) assert info1.size == put_info1.size await storage.delete(put_info1.object_id) data2 = cudf.DataFrame( pd.DataFrame( { 'col1': np.arange(10), 'col2': [f'str{i}' for i in range(10)], 'col3': np.random.rand(10) }, )) put_info2 = await storage.put(data2) get_data2 = await storage.get(put_info2.object_id) cudf.testing.assert_frame_equal(data2, get_data2) info2 = await storage.object_info(put_info2.object_id) assert info2.size == put_info2.size await CudaStorage.teardown(**teardown_params) # test writer and reader t = np.random.random(10) b = dataserializer.dumps(t) async with await storage.open_writer(size=len(b)) as writer: split = len(b) // 2 await writer.write(b[:split]) await writer.write(b[split:]) async with await storage.open_reader(writer.object_id) as reader: content = await reader.read() b = content.to_host_array().tobytes() t2 = dataserializer.loads(b) np.testing.assert_array_equal(t, t2) # write cupy array t = cupy.random.random((10, )) headers, buffers = serialize(t) async with await storage.open_writer(size=len(b)) as writer: for buffer in buffers: await writer.write(buffer.data) async with await storage.open_reader(writer.object_id) as reader: b2 = await reader.read() t2 = deserialize(headers, [b2]) cupy.testing.assert_array_equal(t, t2) await CudaStorage.teardown(**teardown_params)
def test_cudf(): test_df = cudf.DataFrame( pd.DataFrame({ 'a': np.random.rand(1000), 'b': np.random.choice(list('abcd'), size=(1000, )), 'c': np.random.randint(0, 100, size=(1000, )), })) cudf.testing.assert_frame_equal(test_df, deserialize(*serialize(test_df)))
async def recv(self): if self._closed.is_set(): # pragma: no cover raise ChannelClosed('Channel already closed, cannot write message') try: return deserialize(*(await self._in_queue.get())) except RuntimeError: # pragma: no cover if not self._closed.is_set(): raise
def deserialize_serializable(ser_serializable): from .serialization import deserialize bio = io.BytesIO(ser_serializable) s_header_length = struct.unpack('Q', bio.read(8))[0] header2 = pickle.loads(bio.read(s_header_length)) buffers2 = [bio.read(s) for s in header2['buf_sizes']] return deserialize(header2, buffers2)
def testCupy(self): test_vals = [ cupy.array(np.random.rand(100, 100)), cupy.array(np.random.rand(100, 100).T), ] for val in test_vals: deserialized = deserialize(*serialize(val)) self.assertEqual(type(val), type(deserialized)) cupy.testing.assert_array_equal(val, deserialized)
async def recv(self): if self._closed.is_set(): # pragma: no cover raise ChannelClosed('Channel already closed, cannot write message') try: # Wait on ray object ref object_ref = await self._in_queue.get() return deserialize(*(await object_ref)) except RuntimeError as e: if not self._closed.is_set(): raise e
def test_dag_serialization(graph_type): n1 = MySerializable(_id=1) n2 = MySerializable(_id=2) graph = graph_type([n2]) graph.add_node(n1) graph.add_node(n2) graph.add_edge(n1, n2) header, buffers = serialize(graph) graph2 = deserialize(header, buffers) assert len(graph) == len(graph2) > 0
def testNumpy(self): test_vals = [ np.array(np.random.rand(100, 100)), np.array(np.random.rand(100, 100).T), np.array(['a', 'bcd', None]), ] for val in test_vals: deserialized = deserialize(*serialize(val)) self.assertEqual(type(val), type(deserialized)) np.testing.assert_equal(val, deserialized) if val.flags.f_contiguous: self.assertTrue(deserialized.flags.f_contiguous)
async def recv(self): if self._closed.is_set(): # pragma: no cover raise ChannelClosed('Channel already closed, cannot recv message') try: # Wait on ray object ref object_ref = await self._in_queue.get() result = await object_ref if isinstance(result, RayChannelException): raise result.exc_value.with_traceback(result.exc_traceback) return deserialize(*result) except (RuntimeError, ServerClosed) as e: # pragma: no cover if not self._closed.is_set(): raise e
def test_arrow(): test_df = pd.DataFrame({ 'a': np.random.rand(1000), 'b': np.random.choice(list('abcd'), size=(1000, )), 'c': np.random.randint(0, 100, size=(1000, )), }) test_vals = [ pa.RecordBatch.from_pandas(test_df), pa.Table.from_pandas(test_df), ] for val in test_vals: deserialized = deserialize(*serialize(val)) assert type(val) is type(deserialized) np.testing.assert_equal(val, deserialized)
def testCore(self): test_vals = [ False, 123, 3.567, 3.5 + 4.3j, b'abcd', 'abcd', ['uvw', ('mno', 'sdaf'), 4, 6.7], CustomList([3, 4, CustomList([5, 6])]), { 'abc': 5.6, 'def': [3.4] }, OrderedDict([('abcd', 5.6)]) ] for val in test_vals: deserialized = deserialize(*serialize(val)) self.assertEqual(type(val), type(deserialized)) self.assertEqual(val, deserialized)
async def recv(self): if self._closed.is_set(): # pragma: no cover raise ChannelClosed('Channel already closed, cannot recv message') try: # Wait on ray object ref message, object_ref = await self._in_queue.get() with debug_async_timeout('ray_object_retrieval_timeout', 'Client sent message is %s', message): result = await object_ref if isinstance(result, RayChannelException): raise result.exc_value.with_traceback(result.exc_traceback) return deserialize(*result) except ray.exceptions.RayActorError: if not self._closed.is_set(): # raise a EOFError as the SocketChannel does raise EOFError('Server may be closed') except (RuntimeError, ServerClosed) as e: # pragma: no cover if not self._closed.is_set(): raise e
def execute_graph(self, graph, keys, **kw): if 'NO_SERIALIZE_IN_TEST_EXECUTOR' not in os.environ: raw_graph = graph graph = deserialize(*serialize(raw_graph)) self._check_graph(graph) if kw.get('compose', True): # decompose the raw graph # due to the reason that, now after fuse, # the inputs of node's op may be fuse, # call optimize to decompose back raw_graph.decompose() Fusion.check_graph(raw_graph) # record shapes generated in tile for n in graph: self._raw_chunk_shapes[n.key] = getattr(n, 'shape', None) return super().execute_graph(graph, keys, **kw)
def test_serializable(): my_serializable = MySerializable( _id='1', _any_val='any_value', _bool_val=True, _int8_val=-8, _int16_val=np.int16(-16), _int32_val=-32, _int64_val=-64, _uint8_val=8, _uint16_val=16, _uint32_val=np.uint32(32), _uint64_val=64, _float16_val=1., _float32_val=np.float32(2.), _float64_val=2., _complex64_val=np.complex64(1+2j), _complex128_val=1+2j, _string_val='string_value', _bytes_val=b'bytes_value', _key_val=MyHasKey('aaa'), _ndarray_val=np.random.rand(4, 3), _datetime64_val=pd.Timestamp(123), _timedelta64_val=pd.Timedelta(days=1), _datatype_val=np.dtype(np.int32), _index_val=pd.Index([1, 2]), _series_val=pd.Series(['a', 'b']), _dataframe_val=pd.DataFrame({'a': [1, 2, 3]}), _interval_array_val=pd.arrays.IntervalArray([]), _slice_val=slice(1, 10, 2), _function_val=lambda x: x + 1, _named_tuple_val=my_namedtuple(a=1, b=2), _tzinfo_val=timezone.utc, _list_val=[1, 2], _tuple_val=('a', 'b'), _dict_val={'a': b'bytes_value'}, _ref_val=MySerializable(), _oneof_val=MySerializable(_id='2') ) header, buffers = serialize(my_serializable) my_serializable2 = deserialize(header, buffers) _assert_serializable_eq(my_serializable, my_serializable2)
def test_dict_without_init_args(): val = DictWithoutInitArgs() val['a'] = 'b' deserialized = deserialize(*serialize(val)) assert deserialized == val
def test_nested_list(): val = ['a' * 100] * 100 val[0] = val deserialized = deserialize(*serialize(val)) assert deserialized[0] is deserialized assert val[1:] == deserialized[1:]
def test_core(val): deserialized = deserialize(*serialize(val)) assert type(val) == type(deserialized) assert val == deserialized
def test_mars_sparse(): val = SparseMatrix(sps.random(100, 100, 0.1, format='csr')) deserial = deserialize(*serialize(val)) assert (val.spmatrix != deserial.spmatrix).nnz == 0
def testMarsSparse(self): val = SparseMatrix(sps.random(100, 100, 0.1, format='csr')) deserial = deserialize(*serialize(val)) self.assertTrue((val.spmatrix != deserial.spmatrix).nnz == 0)
def test_cupy(np_val): val = cupy.array(np_val) deserialized = deserialize(*serialize(val)) assert type(val) is type(deserialized) cupy.testing.assert_array_equal(val, deserialized)
def testScipySparse(self): val = sps.random(100, 100, 0.1, format='csr') deserial = deserialize(*serialize(val)) self.assertTrue((val != deserial).nnz == 0)
def test_numpy(val): deserialized = deserialize(*serialize(val)) assert type(val) == type(deserialized) np.testing.assert_equal(val, deserialized) if val.flags.f_contiguous: assert deserialized.flags.f_contiguous
def test_scipy_sparse(): val = sps.random(100, 100, 0.1, format='csr') deserial = deserialize(*serialize(val)) assert (val != deserial).nnz == 0
def deserializer(to_deserialize): return deserialize(*to_deserialize)