def testInitialAssignsWithInputs(self): import numpy as np from mars.tensor.random import TensorRandint from mars.tensor.arithmetic import TensorTreeAdd n1 = TensorRandint(state=np.random.RandomState(0), dtype=np.float32()).new_chunk(None, shape=(10, 10)) n2 = TensorRandint(state=np.random.RandomState(1), dtype=np.float32()).new_chunk(None, shape=(10, 10)) n3 = TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10)) n3.op._inputs = [n1, n2] n4 = TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10)) n4.op._inputs = [n3] graph = DAG() graph.add_node(n1) graph.add_node(n3) graph.add_node(n4) graph.add_edge(n1, n3) graph.add_edge(n3, n4) analyzer = GraphAnalyzer(graph, {}) ext_chunks = analyzer.collect_external_input_chunks(initial=False) self.assertListEqual(ext_chunks[n3.op.key], [n2.key]) self.assertEqual( len(analyzer.collect_external_input_chunks(initial=True)), 0)
def _build_chunk_dag(node_str, edge_str): from mars.tensor.random import TensorRandint from mars.tensor.arithmetic import TensorTreeAdd char_dag = DAG() for s in node_str.split(','): char_dag.add_node(s.strip()) for s in edge_str.split(','): l, r = s.split('->') char_dag.add_edge(l.strip(), r.strip()) chunk_dag = DAG() str_to_chunk = dict() for s in char_dag.topological_iter(): if char_dag.count_predecessors(s): c = TensorTreeAdd(args=[], _key=s, dtype=np.dtype(np.float32())).new_chunk( None, shape=(10, 10)).data inputs = c.op._inputs = [ str_to_chunk[ps] for ps in char_dag.predecessors(s) ] else: c = TensorRandint(_key=s, dtype=np.dtype( np.float32())).new_chunk(None, shape=(10, 10)).data inputs = [] str_to_chunk[s] = c chunk_dag.add_node(c) for inp in inputs: chunk_dag.add_edge(inp, c) return chunk_dag, str_to_chunk
async def test_assign_gpu_tasks(actor_pool): pool, session_id, assigner_ref, cluster_api, meta_api = actor_pool input1 = TensorFetch(key='a', source_key='a', dtype=np.dtype(int)).new_chunk([]) input2 = TensorFetch(key='b', source_key='b', dtype=np.dtype(int)).new_chunk([]) result_chunk = TensorTreeAdd(args=[input1, input2], gpu=True) \ .new_chunk([input1, input2]) chunk_graph = ChunkGraph([result_chunk]) chunk_graph.add_node(input1) chunk_graph.add_node(input2) chunk_graph.add_node(result_chunk) chunk_graph.add_edge(input1, result_chunk) chunk_graph.add_edge(input2, result_chunk) await meta_api.set_chunk_meta(input1, memory_size=200, store_size=200, bands=[('address0', 'numa-0')]) await meta_api.set_chunk_meta(input2, memory_size=200, store_size=200, bands=[('address0', 'numa-0')]) subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph) [result] = await assigner_ref.assign_subtasks([subtask]) assert result[1].startswith('gpu')
def testSameKeyAssign(self): import numpy as np from mars.tensor.random import TensorRandint from mars.tensor.arithmetic import TensorTreeAdd graph = DAG() r""" Proper initial allocation should divide the graph like U U | U U | U U | | | | | | | | | | | | | | U U | U U | U U """ inputs = [ tuple( TensorRandint(_key=str(i), dtype=np.float32()).new_chunk( None, shape=(10, 10)) for _ in range(2)) for i in range(6) ] results = [ TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10)) for _ in range(6) ] for inp, r in zip(inputs, results): r.op._inputs = list(inp) graph.add_node(r) for n in inp: graph.add_node(n) graph.add_edge(n, r) analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24)) assignments = analyzer.calc_operand_assignments( analyzer.get_initial_operand_keys()) self.assertEqual(len(assignments), 6)
async def test_execute_tensor(actor_pool): pool, session_id, meta_api, storage_api, execution_ref = actor_pool data1 = np.random.rand(10, 10) data2 = np.random.rand(10, 10) input1 = TensorFetch(key='input1', source_key='input2', dtype=np.dtype(int)).new_chunk([]) input2 = TensorFetch(key='input2', source_key='input2', dtype=np.dtype(int)).new_chunk([]) result_chunk = TensorTreeAdd(args=[input1, input2]) \ .new_chunk([input1, input2], shape=data1.shape, dtype=data1.dtype) await meta_api.set_chunk_meta(input1, memory_size=data1.nbytes, store_size=data1.nbytes, bands=[(pool.external_address, 'numa-0')]) await meta_api.set_chunk_meta(input2, memory_size=data1.nbytes, store_size=data2.nbytes, bands=[(pool.external_address, 'numa-0')]) # todo use different storage level when storage ready await storage_api.put(input1.key, data1) await storage_api.put(input2.key, data2) chunk_graph = ChunkGraph([result_chunk]) chunk_graph.add_node(input1) chunk_graph.add_node(input2) chunk_graph.add_node(result_chunk) chunk_graph.add_edge(input1, result_chunk) chunk_graph.add_edge(input2, result_chunk) subtask = Subtask('test_task', session_id=session_id, chunk_graph=chunk_graph) await execution_ref.run_subtask(subtask, 'numa-0', pool.external_address) # check if results are correct result = await storage_api.get(result_chunk.key) np.testing.assert_array_equal(data1 + data2, result) # check if quota computations are correct quota_ref = await mo.actor_ref(QuotaActor.gen_uid('numa-0'), address=pool.external_address) [quota] = await quota_ref.get_batch_quota_reqs() assert quota[(subtask.subtask_id, subtask.subtask_id)] == data1.nbytes # check if metas are correct result_meta = await meta_api.get_chunk_meta(result_chunk.key) assert result_meta['object_id'] == result_chunk.key assert result_meta['shape'] == result.shape
def testMockExecuteSize(self): import mars.tensor as mt from mars.core.graph import DAG from mars.tensor.fetch import TensorFetch from mars.tensor.arithmetic import TensorTreeAdd graph_add = DAG() input_chunks = [] for _ in range(2): fetch_op = TensorFetch(dtype=np.dtype('int64')) inp_chunk = fetch_op.new_chunk(None, shape=(100, 100)).data input_chunks.append(inp_chunk) add_op = TensorTreeAdd(args=input_chunks, dtype=np.dtype('int64')) add_chunk = add_op.new_chunk(input_chunks, shape=(100, 100), dtype=np.dtype('int64')).data graph_add.add_node(add_chunk) for inp_chunk in input_chunks: graph_add.add_node(inp_chunk) graph_add.add_edge(inp_chunk, add_chunk) executor = Executor() res = executor.execute_graph(graph_add, [add_chunk.key], compose=False, mock=True)[0] self.assertEqual(res, (80000, 80000)) self.assertEqual(executor.mock_max_memory, 80000) for _ in range(3): new_add_op = TensorTreeAdd(args=[add_chunk], dtype=np.dtype('int64')) new_add_chunk = new_add_op.new_chunk([add_chunk], shape=(100, 100), dtype=np.dtype('int64')).data graph_add.add_node(new_add_chunk) graph_add.add_edge(add_chunk, new_add_chunk) add_chunk = new_add_chunk executor = Executor() res = executor.execute_graph(graph_add, [add_chunk.key], compose=False, mock=True)[0] self.assertEqual(res, (80000, 80000)) self.assertEqual(executor.mock_max_memory, 160000) a = mt.random.rand(10, 10, chunk_size=10) b = a[:, mt.newaxis, :] - a r = mt.triu(mt.sqrt(b**2).sum(axis=2)) executor = Executor() res = executor.execute_tensor(r, concat=False, mock=True) # larger than maximal memory size in calc procedure self.assertGreaterEqual(res[0][0], 800) self.assertGreaterEqual(executor.mock_max_memory, 8000)
def _build_test_graph(data_list): from mars.tensor.fetch import TensorFetch from mars.tensor.arithmetic import TensorTreeAdd inputs = [] for idx, d in enumerate(data_list): chunk_key = 'chunk-%d' % idx fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \ .new_chunk([], shape=d.shape, _key=chunk_key) inputs.append(fetch_chunk) add_chunk = TensorTreeAdd(data_list[0].dtype).new_chunk(inputs, shape=data_list[0].shape) exec_graph = DAG() exec_graph.add_node(add_chunk) for input_chunk in inputs: exec_graph.add_node(input_chunk) exec_graph.add_edge(input_chunk, add_chunk) return exec_graph, inputs, add_chunk
def _build_test_graph(data_list): from mars.tensor.fetch import TensorFetch from mars.tensor.arithmetic import TensorTreeAdd inputs = [] for idx, d in enumerate(data_list): chunk_key = f'chunk-{random.randint(0, 999)}-{idx}' fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \ .new_chunk([], shape=d.shape, _key=chunk_key) inputs.append(fetch_chunk) add_chunk = TensorTreeAdd(args=inputs, dtype=data_list[0].dtype) \ .new_chunk(inputs, shape=data_list[0].shape) exec_graph = ChunkGraph([add_chunk.data]) exec_graph.add_node(add_chunk.data) for input_chunk in inputs: exec_graph.add_node(input_chunk.data) exec_graph.add_edge(input_chunk.data, add_chunk.data) return exec_graph, inputs, add_chunk
async def test_assigner(actor_pool): pool, session_id, assigner_ref, meta_api = actor_pool input1 = TensorFetch(key='a', source_key='a', dtype=np.dtype(int)).new_chunk([]) input2 = TensorFetch(key='b', source_key='b', dtype=np.dtype(int)).new_chunk([]) input3 = TensorFetch(key='c', source_key='c', dtype=np.dtype(int)).new_chunk([]) result_chunk = TensorTreeAdd(args=[input1, input2, input3]) \ .new_chunk([input1, input2, input3]) chunk_graph = ChunkGraph([result_chunk]) chunk_graph.add_node(input1) chunk_graph.add_node(input2) chunk_graph.add_node(input3) chunk_graph.add_node(result_chunk) chunk_graph.add_edge(input1, result_chunk) chunk_graph.add_edge(input2, result_chunk) chunk_graph.add_edge(input3, result_chunk) await meta_api.set_chunk_meta(input1, memory_size=200, store_size=200, bands=[('address0', 'numa-0')]) await meta_api.set_chunk_meta(input2, memory_size=400, store_size=400, bands=[('address1', 'numa-0')]) await meta_api.set_chunk_meta(input3, memory_size=400, store_size=400, bands=[('address2', 'numa-0')]) subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph) [result] = await assigner_ref.assign_subtasks([subtask]) assert result in (('address1', 'numa-0'), ('address2', 'numa-0'))
def testCompose(self): """ test compose in build graph and optimize """ r""" graph(@: node, #: composed_node): @ --> @ --> @ ========> # """ chunks = [ TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(3) ] graph = DirectedGraph() lmap(graph.add_node, chunks[:3]) graph.add_edge(chunks[0], chunks[1]) graph.add_edge(chunks[1], chunks[2]) composed_nodes = graph.compose() self.assertTrue(composed_nodes[0].composed == chunks[:3]) # make the middle one as result chunk, thus the graph cannot be composed composed_nodes = graph.compose(keys=[chunks[1].key]) self.assertEqual(len(composed_nodes), 0) r""" graph(@: node, #: composed_node): @ @ @ @ \ / \ / @ --> @ ========> # / \ / \ @ @ @ @ """ chunks = [ TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(6) ] graph = DirectedGraph() lmap(graph.add_node, chunks[:6]) chunks[2].op._inputs = [chunks[0], chunks[1]] chunks[3].op._inputs = [chunks[2]] chunks[4].op._inputs = [chunks[3]] chunks[5].op._inputs = [chunks[3]] graph.add_edge(chunks[0], chunks[2]) graph.add_edge(chunks[1], chunks[2]) graph.add_edge(chunks[2], chunks[3]) graph.add_edge(chunks[3], chunks[4]) graph.add_edge(chunks[3], chunks[5]) composed_nodes = graph.compose() self.assertTrue(composed_nodes[0].composed == chunks[2:4]) # to make sure the predecessors and successors of compose are right # 0 and 1's successors must be composed self.assertIn(composed_nodes[0], graph.successors(chunks[0])) self.assertIn(composed_nodes[0], graph.successors(chunks[1])) # check composed's inputs self.assertIn(chunks[0].key, [n.key for n in composed_nodes[0].inputs]) self.assertIn(chunks[1].key, [n.key for n in composed_nodes[0].inputs]) # check composed's predecessors self.assertIn(chunks[0], graph.predecessors(composed_nodes[0])) self.assertIn(chunks[1], graph.predecessors(composed_nodes[0])) # check 4 and 5's inputs self.assertIn( composed_nodes[0].key, [n.key for n in graph.successors(composed_nodes[0])[0].inputs]) self.assertIn( composed_nodes[0].key, [n.key for n in graph.successors(composed_nodes[0])[0].inputs]) # check 4 and 5's predecessors self.assertIn(composed_nodes[0], graph.predecessors(chunks[4])) self.assertIn(composed_nodes[0], graph.predecessors(chunks[5])) # test optimizer compose r""" graph(@: node, S: Slice Chunk, #: composed_node): @ @ @ @ \ / \ / @ --> @ --> S ========> # --> S / \ / \ @ @ @ @ compose stopped at S, because numexpr don't support Slice op """ chunks = [ TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(6) ] chunk_slice = TensorSlice().new_chunk([None], None) graph = DirectedGraph() lmap(graph.add_node, chunks[:6]) graph.add_node(chunk_slice) graph.add_edge(chunks[0], chunks[2]) graph.add_edge(chunks[1], chunks[2]) graph.add_edge(chunks[2], chunks[3]) graph.add_edge(chunks[3], chunk_slice) graph.add_edge(chunk_slice, chunks[4]) graph.add_edge(chunk_slice, chunks[5]) optimizer = NeOptimizer(graph) composed_nodes = optimizer.compose() self.assertTrue(composed_nodes[0].composed == chunks[2:4]) r""" graph(@: node, S: Slice Chunk, #: composed_node): @ --> @ --> S --> @ ========> # --> S --> @ compose stopped at S, because numexpr don't support Slice op """ chunks = [ TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(4) ] graph = DirectedGraph() lmap(graph.add_node, chunks[:3]) graph.add_node(chunk_slice) graph.add_edge(chunks[0], chunks[1]) graph.add_edge(chunks[1], chunk_slice) graph.add_edge(chunk_slice, chunks[2]) optimizer = NeOptimizer(graph) composed_nodes = optimizer.compose() self.assertTrue(composed_nodes[0].composed == chunks[:2]) self.assertTrue(len(composed_nodes) == 1) r""" graph(@: node, S: Slice Chunk, #: composed_node): @ --> @ --> S --> @ --> @ ========> # --> S --> # compose stopped at S, because numexpr don't support Slice op """ chunks = [ TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(4) ] graph = DirectedGraph() lmap(graph.add_node, chunks[:4]) graph.add_node(chunk_slice) graph.add_edge(chunks[0], chunks[1]) graph.add_edge(chunks[1], chunk_slice) graph.add_edge(chunk_slice, chunks[2]) graph.add_edge(chunks[2], chunks[3]) optimizer = NeOptimizer(graph) composed_nodes = optimizer.compose() self.assertTrue(composed_nodes[0].composed == chunks[:2]) self.assertTrue(composed_nodes[1].composed == chunks[2:4])
def testAssignOnWorkerLost(self): import numpy as np from mars.scheduler import OperandState from mars.tensor.random import TensorRandint from mars.tensor.arithmetic import TensorTreeAdd graph = DAG() r""" Proper initial allocation should divide the graph like FL FL F F R R | FL FL F F R R | x | | x | | x | | | x | | x | | x | R R R R U U | R R R R U U U: UNSCHEDULED F: FINISHED R: READY L: LOST """ op_states = dict() inputs = [ tuple( TensorRandint( dtype=np.float32()).new_chunk(None, shape=(10, 10)) for _ in range(2)) for _ in range(6) ] results = [ tuple( TensorTreeAdd(_key=f'{i}_{j}', dtype=np.float32()).new_chunk( None, shape=(10, 10)) for j in range(2)) for i in range(6) ] for inp, outp in zip(inputs, results): for o in outp: o.op._inputs = list(inp) op_states[o.op.key] = OperandState.UNSCHEDULED graph.add_node(o) for n in inp: op_states[n.op.key] = OperandState.UNSCHEDULED graph.add_node(n) for o in outp: graph.add_edge(n, o) fixed_assigns = dict() for idx in range(4): for i in range(2): fixed_assigns[inputs[idx][i].op.key] = f'w{idx % 2 + 1}' op_states[inputs[idx][i].op.key] = OperandState.FINISHED fixed_assigns[results[idx][i].op.key] = f'w{idx % 2 + 1}' op_states[results[idx][i].op.key] = OperandState.READY for inp in inputs: for n in inp: if n.op.key in fixed_assigns: continue op_states[n.op.key] = OperandState.READY lost_chunks = [c.key for inp in (inputs[0], inputs[2]) for c in inp] worker_metrics = dict(w2=24, w3=24) analyzer = GraphAnalyzer(graph, worker_metrics, fixed_assigns, op_states, lost_chunks) changed_states = analyzer.analyze_state_changes() self.assertEqual(len(changed_states), 8) self.assertTrue( all(changed_states[c.op.key] == OperandState.READY for inp in (inputs[0], inputs[2]) for c in inp)) self.assertTrue( all(changed_states[c.op.key] == OperandState.UNSCHEDULED for res in (results[0], results[2]) for c in res)) assignments = analyzer.calc_operand_assignments( analyzer.get_initial_operand_keys()) for inp in inputs: if any(n.op.key in fixed_assigns for n in inp): continue self.assertEqual(1, len(set(assignments[n.op.key] for n in inp))) worker_assigns = dict((k, 0) for k in worker_metrics) for w in assignments.values(): worker_assigns[w] += 1 self.assertEqual(2, worker_assigns['w2']) self.assertEqual(6, worker_assigns['w3'])
def testAssignWithPreviousData(self): import numpy as np from mars.scheduler.chunkmeta import WorkerMeta from mars.tensor.random import TensorRandint from mars.tensor.arithmetic import TensorTreeAdd graph = DAG() r""" Proper initial allocation should divide the graph like U U | U U | U U \ / | \ / | \ / U | U | U """ inputs = [ tuple( TensorRandint(_key=str(i * 2 + j), dtype=np.float32()).new_chunk( None, shape=(10, 10)) for j in range(2)) for i in range(3) ] results = [ TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10)) for _ in range(3) ] for inp, r in zip(inputs, results): r.op._inputs = list(inp) graph.add_node(r) for n in inp: graph.add_node(n) graph.add_edge(n, r) # assign with partial mismatch data_dist = { '0': dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )), c01=WorkerMeta(chunk_size=5, workers=('w2', ))), '1': dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))), '2': dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))), '3': dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))), '4': dict(c40=WorkerMeta(chunk_size=7, workers=('w3', ))), } analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24)) assignments = analyzer.calc_operand_assignments( analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist) self.assertEqual(len(assignments), 6) # explanation of the result: # for '1', all data are in w1, hence assigned to w1 # '0' assigned to w1 according to connectivity # '2' and '3' assigned to w3 according to connectivity # '4' assigned to w2 because it has fewer data, and the slots of w3 is used up self.assertEqual(assignments['0'], 'w1') self.assertEqual(assignments['1'], 'w1') self.assertEqual(assignments['2'], 'w3') self.assertEqual(assignments['3'], 'w3') self.assertEqual(assignments['4'], 'w2') self.assertEqual(assignments['5'], 'w2') # assign with full mismatch data_dist = { '0': dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )), c01=WorkerMeta(chunk_size=5, workers=( 'w1', 'w2', ))), '1': dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))), '2': dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))), '3': dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))), '4': dict(c40=WorkerMeta(chunk_size=7, workers=('w2', ))), '5': dict(c50=WorkerMeta(chunk_size=7, workers=('w2', ))), } analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24)) assignments = analyzer.calc_operand_assignments( analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist) self.assertEqual(len(assignments), 6) self.assertEqual(assignments['0'], 'w1') self.assertEqual(assignments['1'], 'w1') self.assertEqual(assignments['2'], 'w3') self.assertEqual(assignments['3'], 'w3') self.assertEqual(assignments['4'], 'w2') self.assertEqual(assignments['5'], 'w2')
def testAssignOnWorkerAdd(self): import numpy as np from mars.scheduler import OperandState from mars.tensor.random import TensorRandint from mars.tensor.arithmetic import TensorTreeAdd graph = DAG() r""" Proper initial allocation should divide the graph like F F R R | F F R R | R R R R | x | | x | | | x | | x | | | x | | x | R R U U | R R U U | U U U U U: UNSCHEDULED F: FINISHED R: READY """ inputs = [ tuple( TensorRandint( dtype=np.float32()).new_chunk(None, shape=(10, 10)) for _ in range(2)) for _ in range(6) ] results = [ tuple( TensorTreeAdd(_key='%d_%d' % (i, j), dtype=np.float32()).new_chunk( None, shape=(10, 10)) for j in range(2)) for i in range(6) ] for inp, outp in zip(inputs, results): for o in outp: o.op._inputs = list(inp) graph.add_node(o) for n in inp: graph.add_node(n) for o in outp: graph.add_edge(n, o) # mark initial assigns fixed_assigns = dict() op_states = dict() for idx in range(2): for i in range(2): fixed_assigns[inputs[idx][i].op.key] = 'w%d' % (idx + 1) op_states[results[idx][i].op.key] = OperandState.READY fixed_assigns[results[idx][i].op.key] = 'w%d' % (idx + 1) for inp in inputs: for n in inp: if n.op.key in fixed_assigns: continue op_states[n.op.key] = OperandState.READY worker_metrics = dict(w1=24, w2=24, w3=24) analyzer = GraphAnalyzer(graph, worker_metrics, fixed_assigns, op_states) assignments = analyzer.calc_operand_assignments( analyzer.get_initial_operand_keys()) for inp in inputs: if any(n.op.key in fixed_assigns for n in inp): continue self.assertEqual(1, len(set(assignments[n.op.key] for n in inp))) worker_assigns = dict((k, 0) for k in worker_metrics) for w in assignments.values(): worker_assigns[w] += 1 self.assertEqual(2, worker_assigns['w1']) self.assertEqual(2, worker_assigns['w2']) self.assertEqual(4, worker_assigns['w3'])
def test_fuse(): """ test compose in build graph and optimize """ r""" graph(@: node, #: composed_node): @ --> @ --> @ ========> # """ chunks = [ TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None).data for n in range(3) ] graph = ChunkGraph([]) for c in chunks: graph.add_node(c) graph.add_edge(chunks[0], chunks[1]) graph.add_edge(chunks[1], chunks[2]) graph2 = graph.copy() graph2._result_chunks = [chunks[2]] _, fused_nodes = Fusion(graph2).fuse() assert fused_nodes[0].composed == chunks[:3] # make the middle one as result chunk, thus the graph cannot be composed graph3 = graph.copy() graph3._result_chunks = [chunks[1]] _, fused_nodes = Fusion(graph3).fuse() assert fused_nodes[0].composed == chunks[:2] r""" graph(@: node, #: composed_node): @ @ @ @ \ / \ / @ --> @ ========> # / \ / \ @ @ @ @ """ chunks = [ TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None).data for n in range(6) ] graph = ChunkGraph([chunks[4], chunks[5]]) for c in chunks: graph.add_node(c) chunks[2].op._inputs = [chunks[0], chunks[1]] chunks[3].op._inputs = [chunks[2]] chunks[4].op._inputs = [chunks[3]] chunks[5].op._inputs = [chunks[3]] graph.add_edge(chunks[0], chunks[2]) graph.add_edge(chunks[1], chunks[2]) graph.add_edge(chunks[2], chunks[3]) graph.add_edge(chunks[3], chunks[4]) graph.add_edge(chunks[3], chunks[5]) _, fused_nodes = Fusion(graph).fuse() assert fused_nodes[0].composed == chunks[2:4] # to make sure the predecessors and successors of compose are right # 0 and 1's successors must be composed assert fused_nodes[0] in graph.successors(chunks[0]) assert fused_nodes[0] in graph.successors(chunks[1]) # check composed's inputs assert chunks[0] in fused_nodes[0].inputs assert chunks[1] in fused_nodes[0].inputs # check composed's predecessors assert chunks[0] in graph.predecessors(fused_nodes[0]) assert chunks[1] in graph.predecessors(fused_nodes[0]) # check 4 and 5's inputs assert fused_nodes[0] in graph.successors(fused_nodes[0])[0].inputs assert fused_nodes[0] in graph.successors(fused_nodes[0])[0].inputs # check 4 and 5's predecessors assert fused_nodes[0] in graph.predecessors(chunks[4]) assert fused_nodes[0] in graph.predecessors(chunks[5])
async def test_assign_cpu_tasks(actor_pool): pool, session_id, assigner_ref, cluster_api, meta_api = actor_pool input1 = TensorFetch(key='a', source_key='a', dtype=np.dtype(int)).new_chunk([]) input2 = TensorFetch(key='b', source_key='b', dtype=np.dtype(int)).new_chunk([]) input3 = TensorFetch(key='c', source_key='c', dtype=np.dtype(int)).new_chunk([]) result_chunk = TensorTreeAdd(args=[input1, input2, input3]) \ .new_chunk([input1, input2, input3]) chunk_graph = ChunkGraph([result_chunk]) chunk_graph.add_node(input1) chunk_graph.add_node(input2) chunk_graph.add_node(input3) chunk_graph.add_node(result_chunk) chunk_graph.add_edge(input1, result_chunk) chunk_graph.add_edge(input2, result_chunk) chunk_graph.add_edge(input3, result_chunk) await meta_api.set_chunk_meta(input1, memory_size=200, store_size=200, bands=[('address0', 'numa-0')]) await meta_api.set_chunk_meta(input2, memory_size=400, store_size=400, bands=[('address1', 'numa-0')]) await meta_api.set_chunk_meta(input3, memory_size=400, store_size=400, bands=[('address2', 'numa-0')]) await cluster_api.set_node_status(node='address1', role=NodeRole.WORKER, status=NodeStatus.STOPPING) await cluster_api.set_node_status(node='address3', role=NodeRole.WORKER, status=NodeStatus.STOPPING) subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph) [result] = await assigner_ref.assign_subtasks([subtask]) assert result in (('address0', 'numa-0'), ('address2', 'numa-0')) subtask.expect_bands = [('address0', 'numa-0')] [result] = await assigner_ref.assign_subtasks([subtask]) assert result == ('address0', 'numa-0') subtask.expect_bands = [('address0', 'numa-0'), ('address1', 'numa-0')] [result] = await assigner_ref.assign_subtasks([subtask]) assert result == ('address0', 'numa-0') subtask.expect_bands = [('address1', 'numa-0')] [result] = await assigner_ref.assign_subtasks([subtask]) assert result in (('address0', 'numa-0'), ('address2', 'numa-0')) result_chunk.op.gpu = True subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph) with pytest.raises(NoMatchingSlots) as err: await assigner_ref.assign_subtasks([subtask]) assert 'gpu' in str(err.value)