예제 #1
0
    def testInitialAssignsWithInputs(self):
        import numpy as np
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        n1 = TensorRandint(state=np.random.RandomState(0),
                           dtype=np.float32()).new_chunk(None, shape=(10, 10))
        n2 = TensorRandint(state=np.random.RandomState(1),
                           dtype=np.float32()).new_chunk(None, shape=(10, 10))

        n3 = TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
        n3.op._inputs = [n1, n2]
        n4 = TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
        n4.op._inputs = [n3]

        graph = DAG()
        graph.add_node(n1)
        graph.add_node(n3)
        graph.add_node(n4)
        graph.add_edge(n1, n3)
        graph.add_edge(n3, n4)

        analyzer = GraphAnalyzer(graph, {})
        ext_chunks = analyzer.collect_external_input_chunks(initial=False)
        self.assertListEqual(ext_chunks[n3.op.key], [n2.key])
        self.assertEqual(
            len(analyzer.collect_external_input_chunks(initial=True)), 0)
예제 #2
0
    def _build_chunk_dag(node_str, edge_str):
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        char_dag = DAG()
        for s in node_str.split(','):
            char_dag.add_node(s.strip())
        for s in edge_str.split(','):
            l, r = s.split('->')
            char_dag.add_edge(l.strip(), r.strip())

        chunk_dag = DAG()
        str_to_chunk = dict()
        for s in char_dag.topological_iter():
            if char_dag.count_predecessors(s):
                c = TensorTreeAdd(args=[],
                                  _key=s,
                                  dtype=np.dtype(np.float32())).new_chunk(
                                      None, shape=(10, 10)).data
                inputs = c.op._inputs = [
                    str_to_chunk[ps] for ps in char_dag.predecessors(s)
                ]
            else:
                c = TensorRandint(_key=s, dtype=np.dtype(
                    np.float32())).new_chunk(None, shape=(10, 10)).data
                inputs = []
            str_to_chunk[s] = c
            chunk_dag.add_node(c)
            for inp in inputs:
                chunk_dag.add_edge(inp, c)
        return chunk_dag, str_to_chunk
예제 #3
0
async def test_assign_gpu_tasks(actor_pool):
    pool, session_id, assigner_ref, cluster_api, meta_api = actor_pool

    input1 = TensorFetch(key='a', source_key='a',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='b', source_key='b',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2], gpu=True) \
        .new_chunk([input1, input2])

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])

    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result[1].startswith('gpu')
예제 #4
0
    def testSameKeyAssign(self):
        import numpy as np
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

         U   U   |   U   U   |   U   U
        | | | |  |  | | | |  |  | | | |
         U   U   |   U   U   |   U   U
        """

        inputs = [
            tuple(
                TensorRandint(_key=str(i), dtype=np.float32()).new_chunk(
                    None, shape=(10, 10)) for _ in range(2)) for i in range(6)
        ]
        results = [
            TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
            for _ in range(6)
        ]
        for inp, r in zip(inputs, results):
            r.op._inputs = list(inp)

            graph.add_node(r)
            for n in inp:
                graph.add_node(n)
                graph.add_edge(n, r)

        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys())
        self.assertEqual(len(assignments), 6)
예제 #5
0
async def test_execute_tensor(actor_pool):
    pool, session_id, meta_api, storage_api, execution_ref = actor_pool

    data1 = np.random.rand(10, 10)
    data2 = np.random.rand(10, 10)

    input1 = TensorFetch(key='input1',
                         source_key='input2',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='input2',
                         source_key='input2',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2]) \
        .new_chunk([input1, input2], shape=data1.shape, dtype=data1.dtype)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=data1.nbytes,
                                  store_size=data1.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=data1.nbytes,
                                  store_size=data2.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    # todo use different storage level when storage ready
    await storage_api.put(input1.key, data1)
    await storage_api.put(input2.key, data2)

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)

    subtask = Subtask('test_task',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    await execution_ref.run_subtask(subtask, 'numa-0', pool.external_address)

    # check if results are correct
    result = await storage_api.get(result_chunk.key)
    np.testing.assert_array_equal(data1 + data2, result)

    # check if quota computations are correct
    quota_ref = await mo.actor_ref(QuotaActor.gen_uid('numa-0'),
                                   address=pool.external_address)
    [quota] = await quota_ref.get_batch_quota_reqs()
    assert quota[(subtask.subtask_id, subtask.subtask_id)] == data1.nbytes

    # check if metas are correct
    result_meta = await meta_api.get_chunk_meta(result_chunk.key)
    assert result_meta['object_id'] == result_chunk.key
    assert result_meta['shape'] == result.shape
예제 #6
0
    def testMockExecuteSize(self):
        import mars.tensor as mt
        from mars.core.graph import DAG
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        graph_add = DAG()
        input_chunks = []
        for _ in range(2):
            fetch_op = TensorFetch(dtype=np.dtype('int64'))
            inp_chunk = fetch_op.new_chunk(None, shape=(100, 100)).data
            input_chunks.append(inp_chunk)

        add_op = TensorTreeAdd(args=input_chunks, dtype=np.dtype('int64'))
        add_chunk = add_op.new_chunk(input_chunks,
                                     shape=(100, 100),
                                     dtype=np.dtype('int64')).data
        graph_add.add_node(add_chunk)
        for inp_chunk in input_chunks:
            graph_add.add_node(inp_chunk)
            graph_add.add_edge(inp_chunk, add_chunk)

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 80000)

        for _ in range(3):
            new_add_op = TensorTreeAdd(args=[add_chunk],
                                       dtype=np.dtype('int64'))
            new_add_chunk = new_add_op.new_chunk([add_chunk],
                                                 shape=(100, 100),
                                                 dtype=np.dtype('int64')).data
            graph_add.add_node(new_add_chunk)
            graph_add.add_edge(add_chunk, new_add_chunk)

            add_chunk = new_add_chunk

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 160000)

        a = mt.random.rand(10, 10, chunk_size=10)
        b = a[:, mt.newaxis, :] - a
        r = mt.triu(mt.sqrt(b**2).sum(axis=2))

        executor = Executor()
        res = executor.execute_tensor(r, concat=False, mock=True)
        # larger than maximal memory size in calc procedure
        self.assertGreaterEqual(res[0][0], 800)
        self.assertGreaterEqual(executor.mock_max_memory, 8000)
예제 #7
0
    def _build_test_graph(data_list):
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        inputs = []
        for idx, d in enumerate(data_list):
            chunk_key = 'chunk-%d' % idx
            fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \
                .new_chunk([], shape=d.shape, _key=chunk_key)
            inputs.append(fetch_chunk)
        add_chunk = TensorTreeAdd(data_list[0].dtype).new_chunk(inputs, shape=data_list[0].shape)

        exec_graph = DAG()
        exec_graph.add_node(add_chunk)
        for input_chunk in inputs:
            exec_graph.add_node(input_chunk)
            exec_graph.add_edge(input_chunk, add_chunk)
        return exec_graph, inputs, add_chunk
예제 #8
0
    def _build_test_graph(data_list):
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        inputs = []
        for idx, d in enumerate(data_list):
            chunk_key = f'chunk-{random.randint(0, 999)}-{idx}'
            fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \
                .new_chunk([], shape=d.shape, _key=chunk_key)
            inputs.append(fetch_chunk)
        add_chunk = TensorTreeAdd(args=inputs, dtype=data_list[0].dtype) \
            .new_chunk(inputs, shape=data_list[0].shape)

        exec_graph = ChunkGraph([add_chunk.data])
        exec_graph.add_node(add_chunk.data)
        for input_chunk in inputs:
            exec_graph.add_node(input_chunk.data)
            exec_graph.add_edge(input_chunk.data, add_chunk.data)
        return exec_graph, inputs, add_chunk
예제 #9
0
async def test_assigner(actor_pool):
    pool, session_id, assigner_ref, meta_api = actor_pool

    input1 = TensorFetch(key='a', source_key='a',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='b', source_key='b',
                         dtype=np.dtype(int)).new_chunk([])
    input3 = TensorFetch(key='c', source_key='c',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2, input3]) \
        .new_chunk([input1, input2, input3])

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(input3)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)
    chunk_graph.add_edge(input3, result_chunk)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address1', 'numa-0')])
    await meta_api.set_chunk_meta(input3,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address2', 'numa-0')])

    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result in (('address1', 'numa-0'), ('address2', 'numa-0'))
예제 #10
0
    def testCompose(self):
        """
        test compose in build graph and optimize
        """
        r"""
        graph(@: node, #: composed_node):

        @ --> @ --> @   ========>    #
        """
        chunks = [
            TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(3)
        ]
        graph = DirectedGraph()
        lmap(graph.add_node, chunks[:3])
        graph.add_edge(chunks[0], chunks[1])
        graph.add_edge(chunks[1], chunks[2])

        composed_nodes = graph.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[:3])

        # make the middle one as result chunk, thus the graph cannot be composed
        composed_nodes = graph.compose(keys=[chunks[1].key])
        self.assertEqual(len(composed_nodes), 0)
        r"""
        graph(@: node, #: composed_node):

        @             @              @       @
          \         /                  \   /
            @ --> @       ========>      #
          /         \                  /   \
        @             @              @       @
        """
        chunks = [
            TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(6)
        ]
        graph = DirectedGraph()
        lmap(graph.add_node, chunks[:6])

        chunks[2].op._inputs = [chunks[0], chunks[1]]
        chunks[3].op._inputs = [chunks[2]]
        chunks[4].op._inputs = [chunks[3]]
        chunks[5].op._inputs = [chunks[3]]

        graph.add_edge(chunks[0], chunks[2])
        graph.add_edge(chunks[1], chunks[2])
        graph.add_edge(chunks[2], chunks[3])
        graph.add_edge(chunks[3], chunks[4])
        graph.add_edge(chunks[3], chunks[5])

        composed_nodes = graph.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[2:4])

        # to make sure the predecessors and successors of compose are right
        # 0 and 1's successors must be composed
        self.assertIn(composed_nodes[0], graph.successors(chunks[0]))
        self.assertIn(composed_nodes[0], graph.successors(chunks[1]))
        # check composed's inputs
        self.assertIn(chunks[0].key, [n.key for n in composed_nodes[0].inputs])
        self.assertIn(chunks[1].key, [n.key for n in composed_nodes[0].inputs])
        # check composed's predecessors
        self.assertIn(chunks[0], graph.predecessors(composed_nodes[0]))
        self.assertIn(chunks[1], graph.predecessors(composed_nodes[0]))
        # check 4 and 5's inputs
        self.assertIn(
            composed_nodes[0].key,
            [n.key for n in graph.successors(composed_nodes[0])[0].inputs])
        self.assertIn(
            composed_nodes[0].key,
            [n.key for n in graph.successors(composed_nodes[0])[0].inputs])
        # check 4 and 5's predecessors
        self.assertIn(composed_nodes[0], graph.predecessors(chunks[4]))
        self.assertIn(composed_nodes[0], graph.predecessors(chunks[5]))

        # test optimizer compose
        r"""
        graph(@: node, S: Slice Chunk, #: composed_node):

        @                   @              @             @
          \               /                  \         /
            @ --> @ --> S      ========>       # --> S
          /               \                  /         \
        @                   @              @             @

        compose stopped at S, because numexpr don't support Slice op
        """
        chunks = [
            TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(6)
        ]
        chunk_slice = TensorSlice().new_chunk([None], None)
        graph = DirectedGraph()
        lmap(graph.add_node, chunks[:6])
        graph.add_node(chunk_slice)
        graph.add_edge(chunks[0], chunks[2])
        graph.add_edge(chunks[1], chunks[2])
        graph.add_edge(chunks[2], chunks[3])
        graph.add_edge(chunks[3], chunk_slice)
        graph.add_edge(chunk_slice, chunks[4])
        graph.add_edge(chunk_slice, chunks[5])
        optimizer = NeOptimizer(graph)
        composed_nodes = optimizer.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[2:4])
        r"""
            graph(@: node, S: Slice Chunk, #: composed_node):

            @ --> @ --> S --> @  ========>  # --> S --> @

        compose stopped at S, because numexpr don't support Slice op
        """
        chunks = [
            TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(4)
        ]
        graph = DirectedGraph()
        lmap(graph.add_node, chunks[:3])
        graph.add_node(chunk_slice)
        graph.add_edge(chunks[0], chunks[1])
        graph.add_edge(chunks[1], chunk_slice)
        graph.add_edge(chunk_slice, chunks[2])
        optimizer = NeOptimizer(graph)
        composed_nodes = optimizer.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[:2])
        self.assertTrue(len(composed_nodes) == 1)
        r"""
            graph(@: node, S: Slice Chunk, #: composed_node):

            @ --> @ --> S --> @ --> @   ========>  # --> S --> #

        compose stopped at S, because numexpr don't support Slice op
        """
        chunks = [
            TensorTreeAdd(_key=str(n)).new_chunk(None, None) for n in range(4)
        ]
        graph = DirectedGraph()
        lmap(graph.add_node, chunks[:4])
        graph.add_node(chunk_slice)
        graph.add_edge(chunks[0], chunks[1])
        graph.add_edge(chunks[1], chunk_slice)
        graph.add_edge(chunk_slice, chunks[2])
        graph.add_edge(chunks[2], chunks[3])
        optimizer = NeOptimizer(graph)
        composed_nodes = optimizer.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[:2])
        self.assertTrue(composed_nodes[1].composed == chunks[2:4])
예제 #11
0
    def testAssignOnWorkerLost(self):
        import numpy as np
        from mars.scheduler import OperandState
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

        FL  FL F   F R   R  |  FL  FL F   F R   R
        | x |  | x | | x |  |  | x |  | x | | x |
        R   R  R   R U   U  |  R   R  R   R U   U

        U: UNSCHEDULED  F: FINISHED  R: READY  L: LOST
        """

        op_states = dict()
        inputs = [
            tuple(
                TensorRandint(
                    dtype=np.float32()).new_chunk(None, shape=(10, 10))
                for _ in range(2)) for _ in range(6)
        ]
        results = [
            tuple(
                TensorTreeAdd(_key=f'{i}_{j}', dtype=np.float32()).new_chunk(
                    None, shape=(10, 10)) for j in range(2)) for i in range(6)
        ]
        for inp, outp in zip(inputs, results):
            for o in outp:
                o.op._inputs = list(inp)
                op_states[o.op.key] = OperandState.UNSCHEDULED
                graph.add_node(o)

            for n in inp:
                op_states[n.op.key] = OperandState.UNSCHEDULED
                graph.add_node(n)
                for o in outp:
                    graph.add_edge(n, o)

        fixed_assigns = dict()
        for idx in range(4):
            for i in range(2):
                fixed_assigns[inputs[idx][i].op.key] = f'w{idx % 2 + 1}'
                op_states[inputs[idx][i].op.key] = OperandState.FINISHED
                fixed_assigns[results[idx][i].op.key] = f'w{idx % 2 + 1}'
                op_states[results[idx][i].op.key] = OperandState.READY

        for inp in inputs:
            for n in inp:
                if n.op.key in fixed_assigns:
                    continue
                op_states[n.op.key] = OperandState.READY

        lost_chunks = [c.key for inp in (inputs[0], inputs[2]) for c in inp]

        worker_metrics = dict(w2=24, w3=24)
        analyzer = GraphAnalyzer(graph, worker_metrics, fixed_assigns,
                                 op_states, lost_chunks)
        changed_states = analyzer.analyze_state_changes()

        self.assertEqual(len(changed_states), 8)
        self.assertTrue(
            all(changed_states[c.op.key] == OperandState.READY
                for inp in (inputs[0], inputs[2]) for c in inp))
        self.assertTrue(
            all(changed_states[c.op.key] == OperandState.UNSCHEDULED
                for res in (results[0], results[2]) for c in res))

        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys())
        for inp in inputs:
            if any(n.op.key in fixed_assigns for n in inp):
                continue
            self.assertEqual(1, len(set(assignments[n.op.key] for n in inp)))
        worker_assigns = dict((k, 0) for k in worker_metrics)
        for w in assignments.values():
            worker_assigns[w] += 1
        self.assertEqual(2, worker_assigns['w2'])
        self.assertEqual(6, worker_assigns['w3'])
예제 #12
0
    def testAssignWithPreviousData(self):
        import numpy as np
        from mars.scheduler.chunkmeta import WorkerMeta
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

         U   U  |  U   U  |  U   U
          \ /   |   \ /   |   \ /
           U    |    U    |    U
        """

        inputs = [
            tuple(
                TensorRandint(_key=str(i * 2 +
                                       j), dtype=np.float32()).new_chunk(
                                           None, shape=(10, 10))
                for j in range(2)) for i in range(3)
        ]
        results = [
            TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
            for _ in range(3)
        ]
        for inp, r in zip(inputs, results):
            r.op._inputs = list(inp)

            graph.add_node(r)
            for n in inp:
                graph.add_node(n)
                graph.add_edge(n, r)

        # assign with partial mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=('w2', ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w3', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)

        # explanation of the result:
        # for '1', all data are in w1, hence assigned to w1
        # '0' assigned to w1 according to connectivity
        # '2' and '3' assigned to w3 according to connectivity
        # '4' assigned to w2 because it has fewer data, and the slots of w3 is used up

        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')

        # assign with full mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=(
                     'w1',
                     'w2',
                 ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w2', ))),
            '5':
            dict(c50=WorkerMeta(chunk_size=7, workers=('w2', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)
        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')
예제 #13
0
    def testAssignOnWorkerAdd(self):
        import numpy as np
        from mars.scheduler import OperandState
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

        F   F R   R  |  F   F R   R  |  R   R R   R
        | x | | x |  |  | x | | x |  |  | x | | x |
        R   R U   U  |  R   R U   U  |  U   U U   U

        U: UNSCHEDULED  F: FINISHED  R: READY
        """

        inputs = [
            tuple(
                TensorRandint(
                    dtype=np.float32()).new_chunk(None, shape=(10, 10))
                for _ in range(2)) for _ in range(6)
        ]
        results = [
            tuple(
                TensorTreeAdd(_key='%d_%d' %
                              (i, j), dtype=np.float32()).new_chunk(
                                  None, shape=(10, 10)) for j in range(2))
            for i in range(6)
        ]
        for inp, outp in zip(inputs, results):
            for o in outp:
                o.op._inputs = list(inp)
                graph.add_node(o)

            for n in inp:
                graph.add_node(n)
                for o in outp:
                    graph.add_edge(n, o)

        # mark initial assigns
        fixed_assigns = dict()
        op_states = dict()
        for idx in range(2):
            for i in range(2):
                fixed_assigns[inputs[idx][i].op.key] = 'w%d' % (idx + 1)
                op_states[results[idx][i].op.key] = OperandState.READY
                fixed_assigns[results[idx][i].op.key] = 'w%d' % (idx + 1)

        for inp in inputs:
            for n in inp:
                if n.op.key in fixed_assigns:
                    continue
                op_states[n.op.key] = OperandState.READY

        worker_metrics = dict(w1=24, w2=24, w3=24)
        analyzer = GraphAnalyzer(graph, worker_metrics, fixed_assigns,
                                 op_states)
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys())
        for inp in inputs:
            if any(n.op.key in fixed_assigns for n in inp):
                continue
            self.assertEqual(1, len(set(assignments[n.op.key] for n in inp)))
        worker_assigns = dict((k, 0) for k in worker_metrics)
        for w in assignments.values():
            worker_assigns[w] += 1
        self.assertEqual(2, worker_assigns['w1'])
        self.assertEqual(2, worker_assigns['w2'])
        self.assertEqual(4, worker_assigns['w3'])
예제 #14
0
파일: test_fusion.py 프로젝트: qinxuye/mars
def test_fuse():
    """
    test compose in build graph and optimize
    """
    r"""
    graph(@: node, #: composed_node):

    @ --> @ --> @   ========>    #
    """
    chunks = [
        TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None).data
        for n in range(3)
    ]
    graph = ChunkGraph([])
    for c in chunks:
        graph.add_node(c)
    graph.add_edge(chunks[0], chunks[1])
    graph.add_edge(chunks[1], chunks[2])

    graph2 = graph.copy()
    graph2._result_chunks = [chunks[2]]
    _, fused_nodes = Fusion(graph2).fuse()
    assert fused_nodes[0].composed == chunks[:3]

    # make the middle one as result chunk, thus the graph cannot be composed
    graph3 = graph.copy()
    graph3._result_chunks = [chunks[1]]
    _, fused_nodes = Fusion(graph3).fuse()
    assert fused_nodes[0].composed == chunks[:2]
    r"""
    graph(@: node, #: composed_node):

    @             @              @       @
      \         /                  \   /
        @ --> @       ========>      #
      /         \                  /   \
    @             @              @       @
    """
    chunks = [
        TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None).data
        for n in range(6)
    ]
    graph = ChunkGraph([chunks[4], chunks[5]])
    for c in chunks:
        graph.add_node(c)

    chunks[2].op._inputs = [chunks[0], chunks[1]]
    chunks[3].op._inputs = [chunks[2]]
    chunks[4].op._inputs = [chunks[3]]
    chunks[5].op._inputs = [chunks[3]]

    graph.add_edge(chunks[0], chunks[2])
    graph.add_edge(chunks[1], chunks[2])
    graph.add_edge(chunks[2], chunks[3])
    graph.add_edge(chunks[3], chunks[4])
    graph.add_edge(chunks[3], chunks[5])

    _, fused_nodes = Fusion(graph).fuse()
    assert fused_nodes[0].composed == chunks[2:4]

    # to make sure the predecessors and successors of compose are right
    # 0 and 1's successors must be composed
    assert fused_nodes[0] in graph.successors(chunks[0])
    assert fused_nodes[0] in graph.successors(chunks[1])
    # check composed's inputs
    assert chunks[0] in fused_nodes[0].inputs
    assert chunks[1] in fused_nodes[0].inputs
    # check composed's predecessors
    assert chunks[0] in graph.predecessors(fused_nodes[0])
    assert chunks[1] in graph.predecessors(fused_nodes[0])
    # check 4 and 5's inputs
    assert fused_nodes[0] in graph.successors(fused_nodes[0])[0].inputs
    assert fused_nodes[0] in graph.successors(fused_nodes[0])[0].inputs
    # check 4 and 5's predecessors
    assert fused_nodes[0] in graph.predecessors(chunks[4])
    assert fused_nodes[0] in graph.predecessors(chunks[5])
예제 #15
0
async def test_assign_cpu_tasks(actor_pool):
    pool, session_id, assigner_ref, cluster_api, meta_api = actor_pool

    input1 = TensorFetch(key='a', source_key='a',
                         dtype=np.dtype(int)).new_chunk([])
    input2 = TensorFetch(key='b', source_key='b',
                         dtype=np.dtype(int)).new_chunk([])
    input3 = TensorFetch(key='c', source_key='c',
                         dtype=np.dtype(int)).new_chunk([])
    result_chunk = TensorTreeAdd(args=[input1, input2, input3]) \
        .new_chunk([input1, input2, input3])

    chunk_graph = ChunkGraph([result_chunk])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(input2)
    chunk_graph.add_node(input3)
    chunk_graph.add_node(result_chunk)
    chunk_graph.add_edge(input1, result_chunk)
    chunk_graph.add_edge(input2, result_chunk)
    chunk_graph.add_edge(input3, result_chunk)

    await meta_api.set_chunk_meta(input1,
                                  memory_size=200,
                                  store_size=200,
                                  bands=[('address0', 'numa-0')])
    await meta_api.set_chunk_meta(input2,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address1', 'numa-0')])
    await meta_api.set_chunk_meta(input3,
                                  memory_size=400,
                                  store_size=400,
                                  bands=[('address2', 'numa-0')])

    await cluster_api.set_node_status(node='address1',
                                      role=NodeRole.WORKER,
                                      status=NodeStatus.STOPPING)
    await cluster_api.set_node_status(node='address3',
                                      role=NodeRole.WORKER,
                                      status=NodeStatus.STOPPING)

    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result in (('address0', 'numa-0'), ('address2', 'numa-0'))

    subtask.expect_bands = [('address0', 'numa-0')]
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result == ('address0', 'numa-0')

    subtask.expect_bands = [('address0', 'numa-0'), ('address1', 'numa-0')]
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result == ('address0', 'numa-0')

    subtask.expect_bands = [('address1', 'numa-0')]
    [result] = await assigner_ref.assign_subtasks([subtask])
    assert result in (('address0', 'numa-0'), ('address2', 'numa-0'))

    result_chunk.op.gpu = True
    subtask = Subtask('test_task', session_id, chunk_graph=chunk_graph)
    with pytest.raises(NoMatchingSlots) as err:
        await assigner_ref.assign_subtasks([subtask])
    assert 'gpu' in str(err.value)