def testTensorSerialize(self): from mars.tensor import split t = ones((10, 10, 8), chunk_size=(3, 3, 5)) serials = self._pb_serial(t) dt = self._pb_deserial(serials)[t.data] self.assertEqual(dt.extra_params.raw_chunk_size, (3, 3, 5)) serials = self._json_serial(t) dt = self._json_deserial(serials)[t.data] self.assertEqual(dt.extra_params.raw_chunk_size, (3, 3, 5)) t2, _ = split(t, 2) serials = self._pb_serial(t2) dt = self._pb_deserial(serials)[t2.data] self.assertEqual(dt.op.indices_or_sections, 2) t2, _, _ = split(t, ones(2, chunk_size=2)) serials = self._pb_serial(t2) dt = self._pb_deserial(serials)[t2.data] with build_mode(): self.assertIn(dt.op.indices_or_sections, dt.inputs)
def testMultipleOutputExecute(self): data = np.random.random((5, 9)) # test multiple outputs arr1 = mt.tensor(data.copy(), chunk_size=3) result = mt.modf(arr1).execute() expected = np.modf(data) np.testing.assert_array_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) # test 1 output arr2 = mt.tensor(data.copy(), chunk_size=3) result = ((arr2 + 1) * 2).execute() expected = (data + 1) * 2 np.testing.assert_array_equal(result, expected) # test multiple outputs, but only execute 1 arr3 = mt.tensor(data.copy(), chunk_size=3) arrs = mt.split(arr3, 3, axis=1) result = arrs[0].execute() expected = np.split(data, 3, axis=1)[0] np.testing.assert_array_equal(result, expected) # test multiple outputs, but only execute 1 data = np.random.randint(0, 10, (5, 5)) arr3 = mt.tensor(data) arrs = mt.linalg.qr(arr3) result = arrs[0].execute() expected = np.linalg.qr(data)[0] np.testing.assert_array_almost_equal(result, expected)
def testSessionExecute(self): a = mt.random.rand(10, 20) res = a.sum().execute() self.assertTrue(np.isscalar(res)) self.assertLess(res, 200) data = np.random.random((5, 9)) # test multiple outputs arr1 = mt.tensor(data.copy(), chunks=3) result = mt.modf(arr1).execute() expected = np.modf(data) np.testing.assert_array_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) # test 1 output arr2 = mt.tensor(data.copy(), chunks=3) result = ((arr2 + 1) * 2).execute() expected = (data + 1) * 2 np.testing.assert_array_equal(result, expected) # test multiple outputs, but only execute 1 arr3 = mt.tensor(data.copy(), chunks=3) arrs = mt.split(arr3, 3, axis=1) result = arrs[0].execute() expected = np.split(data, 3, axis=1)[0] np.testing.assert_array_equal(result, expected)
def _prepare_test_graph(self, session_id, graph_key, mock_workers): addr = f'127.0.0.1:{get_next_port()}' a1 = mt.random.random((100,)) a2 = mt.random.random((100,)) s = a1 + a2 v1, v2 = mt.split(s, 2) graph = TileableGraph([v1.data, v2.data]) builder = TileableGraphBuilder(graph) next(iter(builder.build())) with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address], uid=SchedulerClusterInfoActor.default_uid()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_uid(session_id, graph_key)) for w in mock_workers: resource_ref.set_worker_meta(w, dict(hardware=dict(cpu=4, cpu_total=4, memory=1600))) graph_ref.prepare_graph() graph_ref.analyze_graph() graph_ref.create_operand_actors(_start=False) yield pool, graph_ref
def _prepare_test_graph(self, session_id, graph_key, mock_workers): addr = '127.0.0.1:%d' % get_next_port() a1 = mt.random.random((100,)) a2 = mt.random.random((100,)) s = a1 + a2 v1, v2 = mt.split(s, 2) graph = DAG() v1.build_graph(graph=graph, compose=False) v2.build_graph(graph=graph, compose=False) with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.default_name()) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph), uid=GraphActor.gen_name(session_id, graph_key)) for w in mock_workers: resource_ref.set_worker_meta(w, dict(hardware=dict(cpu_total=4))) graph_ref.prepare_graph() graph_ref.analyze_graph() graph_ref.create_operand_actors(_start=False) yield pool, graph_ref
def testIndexTensorExecute(self, *_): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = 2 a_splits = mt.split(a, 2) r1, r2 = session.run(a_splits[0], a[idx], timeout=_exec_timeout) np.testing.assert_array_equal(r1, r2) np.testing.assert_array_equal(r1, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = mt.ones((5, 5)) * 2 r = session2.run(a[idx], timeout=_exec_timeout) np.testing.assert_array_equal(r, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session3: a = mt.random.rand(100, 5) slice1 = a[:10] slice2 = a[10:20] r1, r2, expected = session3.run(slice1, slice2, a, timeout=_exec_timeout) np.testing.assert_array_equal(r1, expected[:10]) np.testing.assert_array_equal(r2, expected[10:20]) with new_session(cluster.endpoint) as session4: a = mt.random.rand(100, 5) a[:10] = mt.ones((10, 5)) a[10:20] = 2 r = session4.run(a, timeout=_exec_timeout) np.testing.assert_array_equal(r[:10], np.ones((10, 5))) np.testing.assert_array_equal(r[10:20], np.ones((10, 5)) * 2) with new_session(cluster.endpoint) as session5: raw = np.random.rand(10, 10) a = mt.tensor(raw, chunk_size=(5, 4)) b = a[a.argmin(axis=1), mt.tensor(np.arange(10))] r = session5.run(b, timeout=_exec_timeout, compose=False) np.testing.assert_array_equal( r, raw[raw.argmin(axis=1), np.arange(10)])
def test_multiple_output_execute(setup): data = np.random.random((5, 9)) # test multiple outputs arr1 = mt.tensor(data.copy(), chunk_size=3) result = mt.modf(arr1).execute().fetch() expected = np.modf(data) np.testing.assert_array_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) # test 1 output arr2 = mt.tensor(data.copy(), chunk_size=3) result = ((arr2 + 1) * 2).to_numpy() expected = (data + 1) * 2 np.testing.assert_array_equal(result, expected) # test multiple outputs, but only execute 1 arr3 = mt.tensor(data.copy(), chunk_size=3) arrs = mt.split(arr3, 3, axis=1) result = arrs[0].to_numpy() expected = np.split(data, 3, axis=1)[0] np.testing.assert_array_equal(result, expected) # test multiple outputs, but only execute 1 data = np.random.randint(0, 10, (5, 5)) arr3 = (mt.tensor(data) + 1) * 2 arrs = mt.linalg.qr(arr3) result = (arrs[0] + 1).to_numpy() expected = np.linalg.qr((data + 1) * 2)[0] + 1 np.testing.assert_array_almost_equal(result, expected) result = (arrs[0] + 2).to_numpy() expected = np.linalg.qr((data + 1) * 2)[0] + 2 np.testing.assert_array_almost_equal(result, expected) s = mt.shape(0) result = s.execute().fetch() expected = np.shape(0) assert result == expected
def testMultipleOutputExecute(self): with option_context({'eager_mode': True}): data = np.random.random((5, 9)) arr1 = mt.tensor(data.copy(), chunk_size=3) result = mt.modf(arr1) expected = np.modf(data) np.testing.assert_array_equal(result[0].fetch(), expected[0]) np.testing.assert_array_equal(result[1].fetch(), expected[1]) arr3 = mt.tensor(data.copy(), chunk_size=3) result1, result2, result3 = mt.split(arr3, 3, axis=1) expected = np.split(data, 3, axis=1) np.testing.assert_array_equal(result1.fetch(), expected[0]) np.testing.assert_array_equal(result2.fetch(), expected[1]) np.testing.assert_array_equal(result3.fetch(), expected[2])
def testIndexTensorExecute(self): with new_cluster(scheduler_n_process=2, worker_n_process=2, shared_memory='20M') as cluster: session = cluster.session a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = 2 a_splits = mt.split(a, 2) r1, r2 = session.run(a_splits[0], a[idx]) np.testing.assert_array_equal(r1, r2) np.testing.assert_array_equal(r1, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = mt.ones((5, 5)) * 2 r = session2.run(a[idx]) np.testing.assert_array_equal(r, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session3: a = mt.random.rand(100, 5) slice1 = a[:10] slice2 = a[10:20] r1, r2, expected = session3.run(slice1, slice2, a) np.testing.assert_array_equal(r1, expected[:10]) np.testing.assert_array_equal(r2, expected[10:20]) with new_session(cluster.endpoint) as session4: a = mt.random.rand(100, 5) a[:10] = mt.ones((10, 5)) a[10:20] = 2 r = session4.run(a) np.testing.assert_array_equal(r[:10], np.ones((10, 5))) np.testing.assert_array_equal(r[10:20], np.ones((10, 5)) * 2)
def testDepths(self): from mars.tensor.arithmetic import TensorAdd from mars.tensor.base import TensorSplit from mars.tensor.datasource import TensorOnes arr = mt.ones(12, chunk_size=4) arr_split = mt.split(arr, 2) arr_sum = arr_split[0] + arr_split[1] graph = arr_sum.build_graph(fuse_enabled=False, tiled=True) analyzer = GraphAnalyzer(graph, {}) depths = analyzer.calc_depths() for n in graph: if isinstance(n.op, TensorOnes): self.assertEqual(0, depths[n.op.key]) elif isinstance(n.op, TensorSplit): self.assertEqual(1, depths[n.op.key]) elif isinstance(n.op, TensorAdd): self.assertLessEqual(2, depths[n.op.key])
def testIndex(self): with option_context({'eager_mode': True}): a = mt.random.rand(10, 5, chunk_size=5) idx = slice(0, 5), slice(0, 5) a[idx] = 1 np.testing.assert_array_equal(a.fetch()[idx], np.ones((5, 5))) split1, split2 = mt.split(a, 2) np.testing.assert_array_equal(split1.fetch(), np.ones((5, 5))) # test bool indexing a = mt.random.rand(8, 8, chunk_size=4) set_value = mt.ones((2, 2)) * 2 a[4:6, 4:6] = set_value b = a[a > 1] self.assertEqual(b.shape, (4, )) np.testing.assert_array_equal(b.fetch(), np.ones((4, )) * 2) c = b.reshape((2, 2)) self.assertEqual(c.shape, (2, 2)) np.testing.assert_array_equal(c.fetch(), np.ones((2, 2)) * 2)
def testIndexTensorExecute(self): with new_cluster(scheduler_n_process=2, worker_n_process=2) as cluster: session = cluster.session a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = 2 a_splits = mt.split(a, 2) r1, r2 = session.run(a_splits[0], a[idx]) np.testing.assert_array_equal(r1, r2) np.testing.assert_array_equal(r1, np.ones((5, 5)) * 2) with new_session(cluster.endpoint) as session2: a = mt.random.rand(10, 5) idx = slice(0, 5), slice(0, 5) a[idx] = mt.ones((5, 5)) * 2 r = session2.run(a[idx]) np.testing.assert_array_equal(r, np.ones((5, 5)) * 2)
def testSplitPreparation(self, *_): arr = mt.ones(12, chunk_size=4) arr_split = mt.split(arr, 2) arr_sum = arr_split[0] + arr_split[1] with self.prepare_graph_in_pool(arr_sum, clean_io_meta=False): pass
def testGraphWithSplit(self): session_id = str(uuid.uuid4()) graph_key = str(uuid.uuid4()) arr = mt.ones(12, chunk_size=4) arr_split = mt.split(arr, 2) arr_sum = arr_split[0] + arr_split[1] graph = arr_sum.build_graph(compose=False) serialized_graph = serialize_graph(graph) chunked_graph = arr_sum.build_graph(compose=False, tiled=True) addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool: pool.create_actor(ClusterInfoActor, [pool.cluster_info.address], uid=ClusterInfoActor.default_name()) resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(AssignerActor, uid=AssignerActor.gen_name(session_id)) graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph, uid=GraphActor.gen_name( session_id, graph_key)) graph_ref.prepare_graph(compose=False) fetched_graph = graph_ref.get_chunk_graph() self.assertIsNotNone(fetched_graph) self.assertEqual(len(chunked_graph), len(fetched_graph)) graph_ref.scan_node() op_infos = graph_ref.get_operand_info() for n in fetched_graph: depth = op_infos[n.op.key]['optimize']['depth'] self.assertIsNotNone(depth) successor_size = op_infos[ n.op.key]['optimize']['successor_size'] self.assertIsNotNone(successor_size) descendant_size = op_infos[ n.op.key]['optimize']['descendant_size'] self.assertIsNotNone(descendant_size) resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4))) resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4))) graph_ref.place_initial_chunks() op_infos = graph_ref.get_operand_info() for n in fetched_graph: if fetched_graph.count_predecessors(n) != 0: continue target_worker = op_infos[n.op.key]['target_worker'] self.assertIsNotNone(target_worker) graph_ref.create_operand_actors(_clean_io_meta=False) op_infos = graph_ref.get_operand_info() for n in fetched_graph: self.assertEqual(op_infos[n.op.key]['op_name'], type(n.op).__name__) io_meta = op_infos[n.op.key]['io_meta'] orig_io_meta = dict( predecessors=list( set(pn.op.key for pn in fetched_graph.iter_predecessors(n))), successors=list( set(sn.op.key for sn in fetched_graph.iter_successors(n))), input_chunks=list( set(pn.key for pn in fetched_graph.iter_predecessors(n))), chunks=list(c.key for c in n.op.outputs), ) self.assertSetEqual(set(io_meta['predecessors']), set(orig_io_meta['predecessors'])) self.assertSetEqual(set(io_meta['successors']), set(orig_io_meta['successors'])) self.assertSetEqual(set(io_meta['input_chunks']), set(orig_io_meta['input_chunks'])) self.assertSetEqual(set(io_meta['chunks']), set(orig_io_meta['chunks'])) self.assertEqual(op_infos[n.op.key]['output_size'], sum(ch.nbytes for ch in n.op.outputs))
def testSessionExecute(self): a = mt.random.rand(10, 20) res = a.sum().execute() self.assertTrue(np.isscalar(res)) self.assertLess(res, 200) data = np.random.random((5, 9)) # test multiple outputs arr1 = mt.tensor(data.copy(), chunk_size=3) result = mt.modf(arr1).execute() expected = np.modf(data) np.testing.assert_array_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) # test 1 output arr2 = mt.tensor(data.copy(), chunk_size=3) result = ((arr2 + 1) * 2).execute() expected = (data + 1) * 2 np.testing.assert_array_equal(result, expected) # test multiple outputs, but only execute 1 arr3 = mt.tensor(data.copy(), chunk_size=3) arrs = mt.split(arr3, 3, axis=1) result = arrs[0].execute() expected = np.split(data, 3, axis=1)[0] np.testing.assert_array_equal(result, expected) # test run the same tensor arr4 = mt.tensor(data.copy(), chunk_size=3) + 1 result1 = arr4.execute() expected = data + 1 np.testing.assert_array_equal(result1, expected) result2 = arr4.execute() np.testing.assert_array_equal(result1, result2) # test run the same tensor with single chunk arr4 = mt.tensor(data.copy()) result1 = arr4.execute() expected = data np.testing.assert_array_equal(result1, expected) result2 = arr4.execute() np.testing.assert_array_equal(result1, result2) # test run same key tensor arr5 = mt.ones((10, 10), chunk_size=3) result1 = arr5.execute() del arr5 arr6 = mt.ones((10, 10), chunk_size=3) result2 = arr6.execute() np.testing.assert_array_equal(result1, result2)
def testGraphWithSplit(self): arr = mt.ones(12, chunk_size=4) arr_split = mt.split(arr, 2) arr_sum = arr_split[0] + arr_split[1] self.run_expr_suite(arr_sum)