コード例 #1
0
ファイル: test_core.py プロジェクト: zzxx-husky/mars
    def testTensorGraphSerialize(self):
        t = ones((10, 3), chunks=(5, 2)) + tensor(np.random.random((10, 3)),
                                                  chunks=(5, 2))
        graph = t.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))
コード例 #2
0
ファイル: test_train.py プロジェクト: ueshin/mars
    def testSerializeLocalTrain(self):
        sess = new_session()

        with LocalContext(sess._sess):
            dmatrix = ToDMatrix(data=self.X, label=self.y)()
            model = XGBTrain(dtrain=dmatrix)()

            graph = model.build_graph(tiled=True)
            DAG.from_json(graph.to_json())

            dmatrix = ToDMatrix(data=self.X_df,
                                label=self.y_series,
                                output_types=[OutputType.dataframe])()
            model = XGBTrain(dtrain=dmatrix)()

            graph = model.build_graph(tiled=True)
            DAG.from_json(graph.to_json())

            new_X = mt.random.rand(1000, 10, chunk_size=(1000, 5))
            new_X, new_y = ToDMatrix(data=new_X,
                                     label=self.y,
                                     multi_output=True)()
            dmatrix = ToDMatrix(data=new_X, label=new_y)()
            dmatrix = dmatrix.tiles()

            self.assertEqual(len(dmatrix.chunks), 1)
コード例 #3
0
ファイル: test_datasource.py プロジェクト: tomzhang/mars-1
    def testDataFrameGraphSerialize(self):
        df = from_pandas_df(pd.DataFrame(np.random.rand(10, 10),
                                         columns=pd.timedelta_range(start='1 day', periods=10),
                                         index=pd.date_range('2020-1-1', periods=10)))
        graph = df.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))
        pd.testing.assert_index_equal(t2.index_value.to_pandas(), t.index_value.to_pandas())
        pd.testing.assert_index_equal(t2.columns_value.to_pandas(), t.columns_value.to_pandas())

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))
        pd.testing.assert_index_equal(t2.index_value.to_pandas(), t.index_value.to_pandas())
        pd.testing.assert_index_equal(t2.columns_value.to_pandas(), t.columns_value.to_pandas())

        # test graph with tiled DataFrame
        t2 = from_pandas_df(pd.DataFrame(np.random.rand(10, 10)), chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
        pd.testing.assert_index_equal(chunks[0].index_value.to_pandas(), t2.chunks[0].index_value.to_pandas())
        pd.testing.assert_index_equal(chunks[0].columns_value.to_pandas(), t2.chunks[0].columns_value.to_pandas())

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
        pd.testing.assert_index_equal(chunks[0].index_value.to_pandas(), t2.chunks[0].index_value.to_pandas())
        pd.testing.assert_index_equal(chunks[0].columns_value.to_pandas(), t2.chunks[0].columns_value.to_pandas())
コード例 #4
0
    def testTensorGraphSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random(
            (10, 3)),
                                                      chunk_size=(5, 2))
        graph = t.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        # test graph with tiled tensor
        t2 = ones((10, 10), chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], TensorChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], TensorChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
コード例 #5
0
    def testDataFrameGraphSerialize(self):
        df = from_pandas(pd.DataFrame(np.random.rand(10, 10)))
        graph = df.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        # test graph with tiled DataFrame
        t2 = from_pandas(pd.DataFrame(np.random.rand(10, 10)),
                         chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
コード例 #6
0
ファイル: test_analyzer.py プロジェクト: yyaaa1/mars
    def testSameKeyAssign(self):
        import numpy as np
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

         U   U   |   U   U   |   U   U
        | | | |  |  | | | |  |  | | | |
         U   U   |   U   U   |   U   U
        """

        inputs = [
            tuple(
                TensorRandint(_key=str(i), dtype=np.float32()).new_chunk(
                    None, shape=(10, 10)) for _ in range(2)) for i in range(6)
        ]
        results = [
            TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
            for _ in range(6)
        ]
        for inp, r in zip(inputs, results):
            r.op._inputs = list(inp)

            graph.add_node(r)
            for n in inp:
                graph.add_node(n)
                graph.add_edge(n, r)

        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys())
        self.assertEqual(len(assignments), 6)
コード例 #7
0
    def testRegister(self):
        from mars.graph import DAG

        fake_result = np.random.rand(10, 10)
        fake_size = (fake_result.nbytes * 2, fake_result.nbytes * 2)

        def fake_execute(ctx, op):
            ctx[op.outputs[0].key] = fake_result

        def fake_estimate(ctx, op):
            ctx[op.outputs[0].key] = fake_size

        register(FakeOperand, fake_execute, fake_estimate)

        graph = DAG()
        chunk = FakeOperand().new_chunk(None, shape=(10, 10))
        graph.add_node(chunk.data)

        executor = Executor()
        res = executor.execute_graph(graph, keys=[chunk.key])[0]
        np.testing.assert_array_equal(res, fake_result)
        size = executor.execute_graph(graph, keys=[chunk.key], mock=True)[0]
        self.assertEqual(size, fake_size)

        graph = DAG()
        chunk = SubFakeOperand().new_chunk(None, shape=(10, 10))
        graph.add_node(chunk.data)

        executor = Executor()
        res = executor.execute_graph(graph, keys=[chunk.key])[0]
        np.testing.assert_array_equal(res, fake_result)
コード例 #8
0
    def testFullInitialAssign(self):
        import numpy as np
        from mars.tensor.expressions.random import TensorRandint
        from mars.tensor.expressions.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like
        
        U   U U   U  |  U   U U   U  |  U   U U   U 
         \ /   \ /   |   \ /   \ /   |   \ /   \ /  
          U     U    |    U     U    |    U     U   
        """

        inputs = [
            tuple(
                TensorRandint(dtype=np.float32()).new_chunk(None, (10, 10))
                for _ in range(2)) for _ in range(6)
        ]
        results = [
            TensorTreeAdd(dtype=np.float32()).new_chunk(None, (10, 10))
            for _ in range(6)
        ]
        for inp, r in zip(inputs, results):
            r.op._inputs = list(inp)

            graph.add_node(r)
            for n in inp:
                graph.add_node(n)
                graph.add_edge(n, r)

        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_initial_assignments()
        for inp in inputs:
            self.assertEqual(1, len(set(assignments[n.op.key] for n in inp)))
コード例 #9
0
ファイル: test_analyzer.py プロジェクト: yyaaa1/mars
    def testInitialAssignsWithInputs(self):
        import numpy as np
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        n1 = TensorRandint(state=np.random.RandomState(0),
                           dtype=np.float32()).new_chunk(None, shape=(10, 10))
        n2 = TensorRandint(state=np.random.RandomState(1),
                           dtype=np.float32()).new_chunk(None, shape=(10, 10))

        n3 = TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
        n3.op._inputs = [n1, n2]
        n4 = TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
        n4.op._inputs = [n3]

        graph = DAG()
        graph.add_node(n1)
        graph.add_node(n3)
        graph.add_node(n4)
        graph.add_edge(n1, n3)
        graph.add_edge(n3, n4)

        analyzer = GraphAnalyzer(graph, {})
        ext_chunks = analyzer.collect_external_input_chunks(initial=False)
        self.assertListEqual(ext_chunks[n3.op.key], [n2.key])
        self.assertEqual(
            len(analyzer.collect_external_input_chunks(initial=True)), 0)
コード例 #10
0
    def _prepare_test_graph(self, session_id, graph_key, mock_workers):
        addr = f'127.0.0.1:{get_next_port()}'
        a1 = mt.random.random((100,))
        a2 = mt.random.random((100,))
        s = a1 + a2
        v1, v2 = mt.split(s, 2)

        graph = DAG()
        v1.build_graph(graph=graph, compose=False)
        v2.build_graph(graph=graph, compose=False)

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.gen_uid(session_id))
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_uid(session_id, graph_key))

            for w in mock_workers:
                resource_ref.set_worker_meta(w, dict(hardware=dict(cpu=4, cpu_total=4, memory=1600)))

            graph_ref.prepare_graph()
            graph_ref.analyze_graph()
            graph_ref.create_operand_actors(_start=False)

            yield pool, graph_ref
コード例 #11
0
    def testEmptyGraph(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor,
                              [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_uid(session_id))

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_key = str(uuid.uuid4())
            serialized_graph = serialize_graph(DAG())

            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_uid(
                                              session_id, graph_key))
            graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.SUCCEEDED)
コード例 #12
0
    def testTensorGraphTiledSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random(
            (10, 3)),
                                                      chunk_size=(5, 2))
        graph = t.build_graph(tiled=True)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunk = next(c for c in graph if c.inputs)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(sorted(i.key for i in chunk.inputs),
                         sorted(i.key for i in chunk2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunk = next(c for c in graph if c.inputs)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(sorted(i.key for i in chunk.inputs),
                         sorted(i.key for i in chunk2.inputs))

        t = ones((10, 3), chunk_size=((3, 5, 2), 2)) + 2
        graph = t.build_graph(tiled=True)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        chunk = next(c for c in graph)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(sorted(i.key for i in chunk.composed),
                         sorted(i.key for i in chunk2.composed))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        chunk = next(c for c in graph)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(sorted(i.key for i in chunk.composed),
                         sorted(i.key for i in chunk2.composed))
コード例 #13
0
ファイル: test_datasource.py プロジェクト: summerskyhk/mars
    def testDataFrameGraphSerialize(self):
        df = from_pandas(pd.DataFrame(np.random.rand(10, 10)))
        graph = df.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))
コード例 #14
0
    def _build_test_graph(data_list):
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        inputs = []
        for idx, d in enumerate(data_list):
            chunk_key = 'chunk-%d' % idx
            fetch_chunk = TensorFetch(to_fetch_key=chunk_key, dtype=d.dtype) \
                .new_chunk([], shape=d.shape, _key=chunk_key)
            inputs.append(fetch_chunk)
        add_chunk = TensorTreeAdd(data_list[0].dtype).new_chunk(inputs, shape=data_list[0].shape)

        exec_graph = DAG()
        exec_graph.add_node(add_chunk)
        for input_chunk in inputs:
            exec_graph.add_node(input_chunk)
            exec_graph.add_edge(input_chunk, add_chunk)
        return exec_graph, inputs, add_chunk
コード例 #15
0
ファイル: test_graph.py プロジェクト: zzxx-husky/mars
    def testDAG(self):
        """
        1 --- 4
        2 --- 6
          \  /
           5
         /
        3
        """

        dag = DAG()
        [dag.add_node(i) for i in range(1, 7)]
        dag.add_edge(1, 4)
        dag.add_edge(2, 6)
        dag.add_edge(2, 5)
        dag.add_edge(5, 6)
        dag.add_edge(3, 5)

        self.assertEqual(set(dag[2]), set([5, 6]))
        self.assertEqual(list(dag.topological_iter()), [3, 2, 5, 6, 1, 4])

        self.assertEqual(list(dag.dfs()), [3, 2, 5, 6, 1, 4])
        self.assertEqual(list(dag.bfs()), [1, 2, 3, 4, 5, 6])

        dag.add_edge(6, 1)
        dag.add_edge(1, 2)

        self.assertRaises(GraphContainsCycleError, lambda: list(dag.topological_iter()))
コード例 #16
0
    def testPrepushGraph(self):
        import mars.tensor as mt
        from mars.graph import DAG
        from mars.tensor.expressions.datasource import TensorFetch

        data_inputs = [np.random.random((4, )) for _ in range(2)]

        arr_inputs = [mt.tensor(di, chunk_size=4) for di in data_inputs]
        arr_add = arr_inputs[0] + arr_inputs[1]

        graph_inputs = [a.build_graph(tiled=True) for a in arr_inputs]
        graph_input_op_keys = [a.chunks[0].op.key for a in arr_inputs]
        arr_add.build_graph(tiled=True)

        graph_add = DAG()
        input_chunks = []
        for a in arr_inputs:
            fetch_op = TensorFetch(dtype=a.dtype)
            inp_chunk = fetch_op.new_chunk(None, a.shape,
                                           _key=a.chunks[0].key).data
            input_chunks.append(inp_chunk)

        new_op = arr_add.chunks[0].op.copy()
        new_add_chunk = new_op.new_chunk(input_chunks,
                                         arr_add.shape,
                                         index=arr_add.chunks[0].index,
                                         dtype=arr_add.dtype,
                                         _key=arr_add.chunks[0].key)
        graph_add.add_node(new_add_chunk)
        for inp_chunk in input_chunks:
            graph_add.add_node(inp_chunk)
            graph_add.add_edge(inp_chunk, new_add_chunk)
        graph_add_key = arr_add.chunks[0].op.key

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())

        def _validate(_):
            data = test_actor._chunk_store.get(session_id,
                                               arr_add.chunks[0].key)
            assert_array_equal(data, data_inputs[0] + data_inputs[1])

        options.worker.spill_directory = tempfile.mkdtemp(
            'mars_worker_prep_spilled-')

        # register when all predecessors unfinished
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(
                    session_id, graph_add_key, serialize_graph(graph_add),
                    dict(chunks=[new_add_chunk.key]), None,
                    pred_keys=graph_input_op_keys, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_add_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                for ginput, op_key, gtensor in zip(graph_inputs,
                                                   graph_input_op_keys,
                                                   arr_inputs):

                    def _start_exec_promise(session_id, op_key, *_):
                        return execution_ref.start_execution(session_id,
                                                             op_key,
                                                             _promise=True)

                    execution_ref.enqueue_graph(
                        session_id, op_key, serialize_graph(ginput),
                        dict(chunks=[gtensor.chunks[0].key]), None,
                        succ_keys=[new_add_chunk.op.key], _promise=True) \
                        .then(functools.partial(_start_exec_promise, session_id, op_key))

                self.get_result()

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())

        # register when part of predecessors unfinished
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())

                execution_ref.enqueue_graph(
                    session_id, graph_input_op_keys[0], serialize_graph(graph_inputs[0]),
                    dict(chunks=[input_chunks[0].key]), None,
                    succ_keys=[new_add_chunk.op.key], _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_input_op_keys[0], _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None, destroy=False)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))
                self.get_result()

                execution_ref.enqueue_graph(
                    session_id, graph_add_key, serialize_graph(graph_add),
                    dict(chunks=[new_add_chunk.key]), None,
                    pred_keys=graph_input_op_keys, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_add_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                execution_ref.enqueue_graph(
                    session_id, graph_input_op_keys[1], serialize_graph(graph_inputs[1]),
                    dict(chunks=[input_chunks[1].key]), None,
                    succ_keys=[new_add_chunk.op.key], _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_input_op_keys[1], _promise=True))

                self.get_result()
コード例 #17
0
ファイル: test_graph.py プロジェクト: tomzhang/mars-1
    def testDAG(self):
        r"""
        1 --- 4
        2 --- 6
          \  /
           5
         /
        3
        """

        dag = DAG()
        [dag.add_node(i) for i in range(1, 7)]
        dag.add_edge(1, 4)
        dag.add_edge(2, 6)
        dag.add_edge(2, 5)
        dag.add_edge(5, 6)
        dag.add_edge(3, 5)

        with self.assertRaises(KeyError):
            dag.add_edge(1, 10)
        with self.assertRaises(KeyError):
            dag.add_edge(10, 1)

        self.assertEqual(set(dag[2]), {5, 6})
        self.assertEqual(list(dag.topological_iter()), [3, 2, 5, 6, 1, 4])

        self.assertEqual(list(dag.dfs()), [3, 2, 5, 6, 1, 4])
        self.assertEqual(list(dag.bfs()), [1, 2, 3, 4, 5, 6])

        dag.add_edge(6, 1)
        dag.add_edge(1, 2)

        with self.assertRaises(KeyError):
            for _ in dag.iter_predecessors(-1):
                pass

        with self.assertRaises(KeyError):
            for _ in dag.iter_successors(-1):
                pass

        self.assertRaises(GraphContainsCycleError,
                          lambda: list(dag.topological_iter()))

        dag.remove_edge(2, 5)
        self.assertFalse(dag.has_successor(2, 5))
        with self.assertRaises(KeyError):
            dag.remove_edge(2, 5)

        rev_dag = dag.build_reversed()
        for n in dag:
            self.assertIn(n, rev_dag)
            self.assertTrue(
                all(
                    rev_dag.has_successor(n, pred)
                    for pred in dag.predecessors(n)))

        undigraph = dag.build_undirected()
        for n in dag:
            self.assertIn(n, undigraph)
            self.assertTrue(
                all(
                    undigraph.has_predecessor(pred, n)
                    for pred in dag.predecessors(n)))
            self.assertTrue(
                all(
                    undigraph.has_successor(n, pred)
                    for pred in dag.predecessors(n)))

        dag_copy = dag.copy()
        for n in dag:
            self.assertIn(n, dag_copy)
            self.assertTrue(
                all(
                    dag_copy.has_successor(pred, n)
                    for pred in dag_copy.predecessors(n)))
コード例 #18
0
    def testCompose(self):
        """
        test compose in build graph and optimize
        """
        r"""
        graph(@: node, #: composed_node):

        @ --> @ --> @   ========>    #
        """
        chunks = [
            TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None)
            for n in range(3)
        ]
        graph = DAG()
        list(map(graph.add_node, chunks[:3]))
        graph.add_edge(chunks[0], chunks[1])
        graph.add_edge(chunks[1], chunks[2])

        composed_nodes = graph.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[:3])

        # make the middle one as result chunk, thus the graph cannot be composed
        composed_nodes = graph.compose(keys=[chunks[1].key])
        self.assertEqual(len(composed_nodes), 0)
        r"""
        graph(@: node, #: composed_node):

        @             @              @       @
          \         /                  \   /
            @ --> @       ========>      #
          /         \                  /   \
        @             @              @       @
        """
        chunks = [
            TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None)
            for n in range(6)
        ]
        graph = DAG()
        list(map(graph.add_node, chunks[:6]))

        chunks[2].op._inputs = [chunks[0], chunks[1]]
        chunks[3].op._inputs = [chunks[2]]
        chunks[4].op._inputs = [chunks[3]]
        chunks[5].op._inputs = [chunks[3]]

        graph.add_edge(chunks[0], chunks[2])
        graph.add_edge(chunks[1], chunks[2])
        graph.add_edge(chunks[2], chunks[3])
        graph.add_edge(chunks[3], chunks[4])
        graph.add_edge(chunks[3], chunks[5])

        composed_nodes = graph.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[2:4])

        # to make sure the predecessors and successors of compose are right
        # 0 and 1's successors must be composed
        self.assertIn(composed_nodes[0], graph.successors(chunks[0]))
        self.assertIn(composed_nodes[0], graph.successors(chunks[1]))
        # check composed's inputs
        self.assertIn(chunks[0].data, composed_nodes[0].inputs)
        self.assertIn(chunks[1].data, composed_nodes[0].inputs)
        # check composed's predecessors
        self.assertIn(chunks[0], graph.predecessors(composed_nodes[0]))
        self.assertIn(chunks[1], graph.predecessors(composed_nodes[0]))
        # check 4 and 5's inputs
        self.assertIn(composed_nodes[0].data,
                      graph.successors(composed_nodes[0])[0].inputs)
        self.assertIn(composed_nodes[0].data,
                      graph.successors(composed_nodes[0])[0].inputs)
        # check 4 and 5's predecessors
        self.assertIn(composed_nodes[0], graph.predecessors(chunks[4]))
        self.assertIn(composed_nodes[0], graph.predecessors(chunks[5]))

        # test optimizer compose
        r"""
        graph(@: node, S: Slice Chunk, #: composed_node):

        @                   @              @             @
          \               /                  \         /
            @ --> @ --> S      ========>       # --> S
          /               \                  /         \
        @                   @              @             @

        compose stopped at S, because numexpr don't support Slice op
        """
        chunks = [
            TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None)
            for n in range(6)
        ]
        chunk_slice = TensorSlice().new_chunk([None], None)
        graph = DAG()
        list(map(graph.add_node, chunks[:6]))
        graph.add_node(chunk_slice)
        graph.add_edge(chunks[0], chunks[2])
        graph.add_edge(chunks[1], chunks[2])
        graph.add_edge(chunks[2], chunks[3])
        graph.add_edge(chunks[3], chunk_slice)
        graph.add_edge(chunk_slice, chunks[4])
        graph.add_edge(chunk_slice, chunks[5])
        optimizer = NeRuntimeOptimizer(graph)
        composed_nodes = optimizer.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[2:4])
        r"""
            graph(@: node, S: Slice Chunk, #: composed_node):

            @ --> @ --> S --> @  ========>  # --> S --> @

        compose stopped at S, because numexpr don't support Slice op
        """
        chunks = [
            TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None)
            for n in range(4)
        ]
        graph = DAG()
        list(map(graph.add_node, chunks[:3]))
        graph.add_node(chunk_slice)
        graph.add_edge(chunks[0], chunks[1])
        graph.add_edge(chunks[1], chunk_slice)
        graph.add_edge(chunk_slice, chunks[2])
        optimizer = NeRuntimeOptimizer(graph)
        composed_nodes = optimizer.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[:2])
        self.assertTrue(len(composed_nodes) == 1)
        r"""
            graph(@: node, S: Slice Chunk, #: composed_node):

            @ --> @ --> S --> @ --> @   ========>  # --> S --> #

        compose stopped at S, because numexpr don't support Slice op
        """
        chunks = [
            TensorTreeAdd(args=[], _key=str(n)).new_chunk(None, None)
            for n in range(4)
        ]
        graph = DAG()
        list(map(graph.add_node, chunks[:4]))
        graph.add_node(chunk_slice)
        graph.add_edge(chunks[0], chunks[1])
        graph.add_edge(chunks[1], chunk_slice)
        graph.add_edge(chunk_slice, chunks[2])
        graph.add_edge(chunks[2], chunks[3])
        optimizer = NeRuntimeOptimizer(graph)
        composed_nodes = optimizer.compose()
        self.assertTrue(composed_nodes[0].composed == chunks[:2])
        self.assertTrue(composed_nodes[1].composed == chunks[2:4])
コード例 #19
0
ファイル: test_analyzer.py プロジェクト: ueshin/mars
    def testAssignOnWorkerAdd(self):
        import numpy as np
        from mars.scheduler import OperandState
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

        F   F R   R  |  F   F R   R  |  R   R R   R
        | x | | x |  |  | x | | x |  |  | x | | x |
        R   R U   U  |  R   R U   U  |  U   U U   U

        U: UNSCHEDULED  F: FINISHED  R: READY
        """

        inputs = [
            tuple(
                TensorRandint(
                    dtype=np.float32()).new_chunk(None, shape=(10, 10))
                for _ in range(2)) for _ in range(6)
        ]
        results = [
            tuple(
                TensorTreeAdd(_key='%d_%d' %
                              (i, j), dtype=np.float32()).new_chunk(
                                  None, shape=(10, 10)) for j in range(2))
            for i in range(6)
        ]
        for inp, outp in zip(inputs, results):
            for o in outp:
                o.op._inputs = list(inp)
                graph.add_node(o)

            for n in inp:
                graph.add_node(n)
                for o in outp:
                    graph.add_edge(n, o)

        # mark initial assigns
        fixed_assigns = dict()
        op_states = dict()
        for idx in range(2):
            for i in range(2):
                fixed_assigns[inputs[idx][i].op.key] = 'w%d' % (idx + 1)
                op_states[results[idx][i].op.key] = OperandState.READY
                fixed_assigns[results[idx][i].op.key] = 'w%d' % (idx + 1)

        for inp in inputs:
            for n in inp:
                if n.op.key in fixed_assigns:
                    continue
                op_states[n.op.key] = OperandState.READY

        worker_metrics = dict(w1=24, w2=24, w3=24)
        analyzer = GraphAnalyzer(graph, worker_metrics, fixed_assigns,
                                 op_states)
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys())
        for inp in inputs:
            if any(n.op.key in fixed_assigns for n in inp):
                continue
            self.assertEqual(1, len(set(assignments[n.op.key] for n in inp)))
        worker_assigns = dict((k, 0) for k in worker_metrics)
        for w in assignments.values():
            worker_assigns[w] += 1
        self.assertEqual(2, worker_assigns['w1'])
        self.assertEqual(2, worker_assigns['w2'])
        self.assertEqual(4, worker_assigns['w3'])
コード例 #20
0
    def testMockExecuteSize(self):
        import mars.tensor as mt
        from mars.graph import DAG
        from mars.tensor.fetch import TensorFetch
        from mars.tensor.arithmetic import TensorTreeAdd

        graph_add = DAG()
        input_chunks = []
        for _ in range(2):
            fetch_op = TensorFetch(dtype=np.dtype('int64'))
            inp_chunk = fetch_op.new_chunk(None, shape=(100, 100)).data
            input_chunks.append(inp_chunk)

        add_op = TensorTreeAdd(dtype=np.dtype('int64'))
        add_chunk = add_op.new_chunk(input_chunks,
                                     shape=(100, 100),
                                     dtype=np.dtype('int64')).data
        graph_add.add_node(add_chunk)
        for inp_chunk in input_chunks:
            graph_add.add_node(inp_chunk)
            graph_add.add_edge(inp_chunk, add_chunk)

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 80000)

        for _ in range(3):
            new_add_op = TensorTreeAdd(dtype=np.dtype('int64'))
            new_add_chunk = new_add_op.new_chunk([add_chunk],
                                                 shape=(100, 100),
                                                 dtype=np.dtype('int64')).data
            graph_add.add_node(new_add_chunk)
            graph_add.add_edge(add_chunk, new_add_chunk)

            add_chunk = new_add_chunk

        executor = Executor()
        res = executor.execute_graph(graph_add, [add_chunk.key],
                                     compose=False,
                                     mock=True)[0]
        self.assertEqual(res, (80000, 80000))
        self.assertEqual(executor.mock_max_memory, 160000)

        a = mt.random.rand(10, 10, chunk_size=10)
        b = a[:, mt.newaxis, :] - a
        r = mt.triu(mt.sqrt(b**2).sum(axis=2))

        executor = Executor()
        res = executor.execute_tensor(r, concat=False, mock=True)
        # larger than maximal memory size in calc procedure
        self.assertGreaterEqual(res[0][0], 800)
        self.assertGreaterEqual(executor.mock_max_memory, 8000)
コード例 #21
0
    def _build_chunk_dag(node_str, edge_str):
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        char_dag = DAG()
        for s in node_str.split(','):
            char_dag.add_node(s.strip())
        for s in edge_str.split(','):
            l, r = s.split('->')
            char_dag.add_edge(l.strip(), r.strip())

        chunk_dag = DAG()
        str_to_chunk = dict()
        for s in char_dag.topological_iter():
            if char_dag.count_predecessors(s):
                c = TensorTreeAdd(_key=s,
                                  dtype=np.float32()).new_chunk(None,
                                                                shape=(10, 10))
                inputs = c.op._inputs = [
                    str_to_chunk[ps] for ps in char_dag.predecessors(s)
                ]
            else:
                c = TensorRandint(_key=s,
                                  dtype=np.float32()).new_chunk(None,
                                                                shape=(10, 10))
                inputs = []
            str_to_chunk[s] = c
            chunk_dag.add_node(c)
            for inp in inputs:
                chunk_dag.add_edge(inp, c)
        return chunk_dag, str_to_chunk
コード例 #22
0
ファイル: test_analyzer.py プロジェクト: yyaaa1/mars
    def testAssignWithPreviousData(self):
        import numpy as np
        from mars.scheduler.chunkmeta import WorkerMeta
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

         U   U  |  U   U  |  U   U
          \ /   |   \ /   |   \ /
           U    |    U    |    U
        """

        inputs = [
            tuple(
                TensorRandint(_key=str(i * 2 +
                                       j), dtype=np.float32()).new_chunk(
                                           None, shape=(10, 10))
                for j in range(2)) for i in range(3)
        ]
        results = [
            TensorTreeAdd(dtype=np.float32()).new_chunk(None, shape=(10, 10))
            for _ in range(3)
        ]
        for inp, r in zip(inputs, results):
            r.op._inputs = list(inp)

            graph.add_node(r)
            for n in inp:
                graph.add_node(n)
                graph.add_edge(n, r)

        # assign with partial mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=('w2', ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w3', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)

        # explanation of the result:
        # for '1', all data are in w1, hence assigned to w1
        # '0' assigned to w1 according to connectivity
        # '2' and '3' assigned to w3 according to connectivity
        # '4' assigned to w2 because it has fewer data, and the slots of w3 is used up

        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')

        # assign with full mismatch
        data_dist = {
            '0':
            dict(c00=WorkerMeta(chunk_size=5, workers=('w1', )),
                 c01=WorkerMeta(chunk_size=5, workers=(
                     'w1',
                     'w2',
                 ))),
            '1':
            dict(c10=WorkerMeta(chunk_size=10, workers=('w1', ))),
            '2':
            dict(c20=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '3':
            dict(c30=WorkerMeta(chunk_size=10, workers=('w3', ))),
            '4':
            dict(c40=WorkerMeta(chunk_size=7, workers=('w2', ))),
            '5':
            dict(c50=WorkerMeta(chunk_size=7, workers=('w2', ))),
        }
        analyzer = GraphAnalyzer(graph, dict(w1=24, w2=24, w3=24))
        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys(), input_chunk_metas=data_dist)

        self.assertEqual(len(assignments), 6)
        self.assertEqual(assignments['0'], 'w1')
        self.assertEqual(assignments['1'], 'w1')
        self.assertEqual(assignments['2'], 'w3')
        self.assertEqual(assignments['3'], 'w3')
        self.assertEqual(assignments['4'], 'w2')
        self.assertEqual(assignments['5'], 'w2')
コード例 #23
0
ファイル: test_analyzer.py プロジェクト: yyaaa1/mars
    def testAssignOnWorkerLost(self):
        import numpy as np
        from mars.scheduler import OperandState
        from mars.tensor.random import TensorRandint
        from mars.tensor.arithmetic import TensorTreeAdd

        graph = DAG()
        r"""
        Proper initial allocation should divide the graph like

        FL  FL F   F R   R  |  FL  FL F   F R   R
        | x |  | x | | x |  |  | x |  | x | | x |
        R   R  R   R U   U  |  R   R  R   R U   U

        U: UNSCHEDULED  F: FINISHED  R: READY  L: LOST
        """

        op_states = dict()
        inputs = [
            tuple(
                TensorRandint(
                    dtype=np.float32()).new_chunk(None, shape=(10, 10))
                for _ in range(2)) for _ in range(6)
        ]
        results = [
            tuple(
                TensorTreeAdd(_key=f'{i}_{j}', dtype=np.float32()).new_chunk(
                    None, shape=(10, 10)) for j in range(2)) for i in range(6)
        ]
        for inp, outp in zip(inputs, results):
            for o in outp:
                o.op._inputs = list(inp)
                op_states[o.op.key] = OperandState.UNSCHEDULED
                graph.add_node(o)

            for n in inp:
                op_states[n.op.key] = OperandState.UNSCHEDULED
                graph.add_node(n)
                for o in outp:
                    graph.add_edge(n, o)

        fixed_assigns = dict()
        for idx in range(4):
            for i in range(2):
                fixed_assigns[inputs[idx][i].op.key] = f'w{idx % 2 + 1}'
                op_states[inputs[idx][i].op.key] = OperandState.FINISHED
                fixed_assigns[results[idx][i].op.key] = f'w{idx % 2 + 1}'
                op_states[results[idx][i].op.key] = OperandState.READY

        for inp in inputs:
            for n in inp:
                if n.op.key in fixed_assigns:
                    continue
                op_states[n.op.key] = OperandState.READY

        lost_chunks = [c.key for inp in (inputs[0], inputs[2]) for c in inp]

        worker_metrics = dict(w2=24, w3=24)
        analyzer = GraphAnalyzer(graph, worker_metrics, fixed_assigns,
                                 op_states, lost_chunks)
        changed_states = analyzer.analyze_state_changes()

        self.assertEqual(len(changed_states), 8)
        self.assertTrue(
            all(changed_states[c.op.key] == OperandState.READY
                for inp in (inputs[0], inputs[2]) for c in inp))
        self.assertTrue(
            all(changed_states[c.op.key] == OperandState.UNSCHEDULED
                for res in (results[0], results[2]) for c in res))

        assignments = analyzer.calc_operand_assignments(
            analyzer.get_initial_operand_keys())
        for inp in inputs:
            if any(n.op.key in fixed_assigns for n in inp):
                continue
            self.assertEqual(1, len(set(assignments[n.op.key] for n in inp)))
        worker_assigns = dict((k, 0) for k in worker_metrics)
        for w in assignments.values():
            worker_assigns[w] += 1
        self.assertEqual(2, worker_assigns['w2'])
        self.assertEqual(6, worker_assigns['w3'])