Beispiel #1
0
    def testRemoteFunctionInLocalCluster(self):
        with new_cluster(scheduler_n_process=2, worker_n_process=3,
                         shared_memory='20M', modules=[__name__], web=True) as cluster:
            session = cluster.session

            def f(x):
                return x + 1

            def g(x, y):
                return x * y

            a = mr.spawn(f, 3)
            b = mr.spawn(f, 4)
            c = mr.spawn(g, (a, b))

            r = session.run(c, timeout=_exec_timeout)
            self.assertEqual(r, 20)

            e = mr.spawn(f, mr.spawn(f, 2))

            r = session.run(e, timeout=_exec_timeout)
            self.assertEqual(r, 4)

            session2 = new_session(cluster.endpoint)
            expect_session_id = session2.session_id

            def f2():
                session = Session.default
                assert isinstance(session._sess, ClusterSession)
                assert session._sess.session_id == expect_session_id

                t = mt.ones((3, 2))
                return t.sum().to_numpy()

            self.assertEqual(cloudpickle.loads(cloudpickle.dumps(Session.default)).session_id,
                             session.session_id)
            self.assertIsInstance(serialize_function(f2), bytes)

            d = mr.spawn(f2, retry_when_fail=False)

            r = session2.run(d, timeout=_exec_timeout)
            self.assertEqual(r, 6)

            # test input tileable
            def f(t, x):
                return (t * x).sum().to_numpy()

            rs = np.random.RandomState(0)
            raw = rs.rand(5, 4)

            t1 = mt.tensor(raw, chunk_size=3)
            t2 = t1.sum(axis=0)
            s = mr.spawn(f, args=(t2, 3), retry_when_fail=False)

            r = session.run(s, timeout=_exec_timeout)
            expected = (raw.sum(axis=0) * 3).sum()
            self.assertAlmostEqual(r, expected)

            # test named tileable
            session3 = new_session(cluster.endpoint)
            t = mt.ones((10, 10), chunk_size=3)
            session3.run(t, name='t_name')

            def f3():
                import mars.tensor as mt

                s = mt.named_tensor(name='t_name')
                return (s + 1).to_numpy()

            d = mr.spawn(f3, retry_when_fail=False)
            r = session3.run(d, timeout=_exec_timeout)
            np.testing.assert_array_equal(r, np.ones((10, 10)) + 1)

            # test tileable that executed
            session4 = new_session(cluster.endpoint)
            df1 = md.DataFrame(raw, chunk_size=3)
            df1 = df1[df1.iloc[:, 0] < 1.5]

            def f4(input_df):
                bonus = input_df.iloc[:, 0].fetch().sum()
                return input_df.sum().to_pandas() + bonus

            d = mr.spawn(f4, args=(df1,), retry_when_fail=False)
            r = session4.run(d, timeout=_exec_timeout)
            expected = pd.DataFrame(raw).sum() + raw[:, 0].sum()
            pd.testing.assert_series_equal(r, expected)

            # test tileable has unknown shape
            session5 = new_session(cluster.endpoint)

            def f5(t, x):
                assert all(not np.isnan(s) for s in t.shape)
                return (t * x).sum().to_numpy()

            rs = np.random.RandomState(0)
            raw = rs.rand(5, 4)

            t1 = mt.tensor(raw, chunk_size=3)
            t2 = t1[t1 < 0.5]
            s = mr.spawn(f5, args=(t2, 3))
            result = session5.run(s, timeout=_exec_timeout)
            expected = (raw[raw < 0.5] * 3).sum()
            self.assertAlmostEqual(result, expected)
Beispiel #2
0
 def testSplitPreparation(self, *_):
     arr = mt.ones(12, chunk_size=4)
     arr_split = mt.split(arr, 2)
     arr_sum = arr_split[0] + arr_split[1]
     with self.prepare_graph_in_pool(arr_sum, clean_io_meta=False):
         pass
Beispiel #3
0
 def testShufflePreparation(self, *_):
     a = mt.ones((31, 27), chunk_size=10)
     b = a.reshape(27, 31)
     b.op.extra_params['_reshape_with_shuffle'] = True
     with self.prepare_graph_in_pool(b, compose=False):
         pass
Beispiel #4
0
    def testMainTensorWithoutEtcd(self):
        self.start_processes()

        session_id = uuid.uuid1()
        actor_client = new_client()

        session_ref = actor_client.actor_ref(
            self.session_manager_ref.create_session(session_id))

        a = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1
        b = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1
        c = (a * b * 2 + 1).sum()
        graph = c.build_graph()
        targets = [c.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, c.key)
        expected = (np.ones(a.shape) * 2 * 1 + 1)**2 * 2 + 1
        assert_allclose(loads(result), expected.sum())

        a = mt.ones((100, 50), chunk_size=35) * 2 + 1
        b = mt.ones((50, 200), chunk_size=35) * 2 + 1
        c = a.dot(b)
        graph = c.build_graph()
        targets = [c.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)
        result = session_ref.fetch_result(graph_key, c.key)
        assert_allclose(loads(result), np.ones((100, 200)) * 450)

        base_arr = np.random.random((100, 100))
        a = mt.array(base_arr)
        sumv = reduce(operator.add, [a[:10, :10] for _ in range(10)])
        graph = sumv.build_graph()
        targets = [sumv.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        expected = reduce(operator.add,
                          [base_arr[:10, :10] for _ in range(10)])
        result = session_ref.fetch_result(graph_key, sumv.key)
        assert_allclose(loads(result), expected)

        a = mt.ones((31, 27), chunk_size=10)
        b = a.reshape(27, 31)
        b.op.extra_params['_reshape_with_shuffle'] = True
        r = b.sum(axis=1)
        graph = r.build_graph()
        targets = [r.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, r.key)
        assert_allclose(loads(result), np.ones((27, 31)).sum(axis=1))

        raw = np.random.RandomState(0).rand(10, 10)
        a = mt.tensor(raw, chunk_size=(5, 4))
        b = a[a.argmin(axis=1), mt.tensor(np.arange(10))]
        graph = b.build_graph()
        targets = [b.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key,
                                          target_tileables=targets)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, b.key)

        np.testing.assert_array_equal(loads(result), raw[raw.argmin(axis=1),
                                                         np.arange(10)])
    def testFromTensorExecution(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        tensor_res = self.executor.execute_tensor(tensor, concat=True)[0]
        pdf_expected = pd.DataFrame(tensor_res)
        df_result = self.executor.execute_dataframe(df, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.RangeIndex(0, 10))
        pd.testing.assert_index_equal(df_result.columns, pd.RangeIndex(0, 10))
        pd.testing.assert_frame_equal(df_result, pdf_expected)

        # test converted with specified index_value and columns
        tensor2 = mt.random.rand(2, 2, chunk_size=1)
        df2 = dataframe_from_tensor(tensor2,
                                    index=pd.Index(['a', 'b']),
                                    columns=pd.Index([3, 4]))
        df_result = self.executor.execute_dataframe(df2, concat=True)[0]
        pd.testing.assert_index_equal(df_result.index, pd.Index(['a', 'b']))
        pd.testing.assert_index_equal(df_result.columns, pd.Index([3, 4]))

        # test converted from 1-d tensor
        tensor3 = mt.array([1, 2, 3])
        df3 = dataframe_from_tensor(tensor3)
        result3 = self.executor.execute_dataframe(df3, concat=True)[0]
        pdf_expected = pd.DataFrame(np.array([1, 2, 3]))
        pd.testing.assert_frame_equal(pdf_expected, result3)

        # test converted from identical chunks
        tensor4 = mt.ones((10, 10), chunk_size=3)
        df4 = dataframe_from_tensor(tensor4)
        result4 = self.executor.execute_dataframe(df4, concat=True)[0]
        pdf_expected = pd.DataFrame(
            self.executor.execute_tensor(tensor4, concat=True)[0])
        pd.testing.assert_frame_equal(pdf_expected, result4)

        # from tensor with given index
        tensor5 = mt.ones((10, 10), chunk_size=3)
        df5 = dataframe_from_tensor(tensor5, index=np.arange(0, 20, 2))
        result5 = self.executor.execute_dataframe(df5, concat=True)[0]
        pdf_expected = pd.DataFrame(self.executor.execute_tensor(
            tensor5, concat=True)[0],
                                    index=np.arange(0, 20, 2))
        pd.testing.assert_frame_equal(pdf_expected, result5)

        # from tensor with given index that is a tensor
        raw7 = np.random.rand(10, 10)
        tensor7 = mt.tensor(raw7, chunk_size=3)
        index_raw7 = np.random.rand(10)
        index7 = mt.tensor(index_raw7, chunk_size=4)
        df7 = dataframe_from_tensor(tensor7, index=index7)
        result7 = self.executor.execute_dataframe(df7, concat=True)[0]
        pdf_expected = pd.DataFrame(raw7, index=index_raw7)
        pd.testing.assert_frame_equal(pdf_expected, result7)

        # from tensor with given index is a md.Index
        raw10 = np.random.rand(10, 10)
        tensor10 = mt.tensor(raw10, chunk_size=3)
        index10 = md.date_range('2020-1-1', periods=10, chunk_size=3)
        df10 = dataframe_from_tensor(tensor10, index=index10)
        result10 = self.executor.execute_dataframe(df10, concat=True)[0]
        pdf_expected = pd.DataFrame(raw10,
                                    index=pd.date_range('2020-1-1',
                                                        periods=10))
        pd.testing.assert_frame_equal(pdf_expected, result10)

        # from tensor with given columns
        tensor6 = mt.ones((10, 10), chunk_size=3)
        df6 = dataframe_from_tensor(tensor6, columns=list('abcdefghij'))
        result6 = self.executor.execute_dataframe(df6, concat=True)[0]
        pdf_expected = pd.DataFrame(self.executor.execute_tensor(
            tensor6, concat=True)[0],
                                    columns=list('abcdefghij'))
        pd.testing.assert_frame_equal(pdf_expected, result6)

        # from 1d tensors
        raws8 = [('a', np.random.rand(8)), ('b', np.random.randint(10,
                                                                   size=8)),
                 ('c', [
                     ''.join(np.random.choice(list(printable), size=6))
                     for _ in range(8)
                 ])]
        tensors8 = OrderedDict(
            (r[0], mt.tensor(r[1], chunk_size=3)) for r in raws8)
        raws8.append(('d', 1))
        raws8.append(('e', pd.date_range('2020-1-1', periods=8)))
        tensors8['d'] = 1
        tensors8['e'] = raws8[-1][1]
        df8 = dataframe_from_1d_tileables(tensors8,
                                          columns=[r[0] for r in raws8])
        result = self.executor.execute_dataframe(df8, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8))
        pd.testing.assert_frame_equal(result, pdf_expected)

        # from 1d tensors and specify index with a tensor
        index_raw9 = np.random.rand(8)
        index9 = mt.tensor(index_raw9, chunk_size=4)
        df9 = dataframe_from_1d_tileables(tensors8,
                                          columns=[r[0] for r in raws8],
                                          index=index9)
        result = self.executor.execute_dataframe(df9, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8), index=index_raw9)
        pd.testing.assert_frame_equal(result, pdf_expected)

        # from 1d tensors and specify index
        df11 = dataframe_from_1d_tileables(tensors8,
                                           columns=[r[0] for r in raws8],
                                           index=md.date_range('2020-1-1',
                                                               periods=8))
        result = self.executor.execute_dataframe(df11, concat=True)[0]
        pdf_expected = pd.DataFrame(OrderedDict(raws8),
                                    index=pd.date_range('2020-1-1', periods=8))
        pd.testing.assert_frame_equal(result, pdf_expected)
Beispiel #6
0
 def f():
     assert Session.default.session_id == session.session_id
     return mt.ones((2, 3)).sum().to_numpy()
Beispiel #7
0
    def testReExecuteSame(self):
        data = np.random.random((5, 9))

        # test run the same tensor
        arr4 = mt.tensor(data.copy(), chunk_size=3) + 1
        result1 = arr4.to_numpy()
        expected = data + 1

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test run the same tensor with single chunk
        arr4 = mt.tensor(data.copy())
        result1 = arr4.to_numpy()
        expected = data

        np.testing.assert_array_equal(result1, expected)

        result2 = arr4.to_numpy()
        np.testing.assert_array_equal(result1, result2)

        # modify result
        sess = Session.default_or_local()
        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr4).chunks[0].key] = data + 2

        result3 = arr4.to_numpy()
        np.testing.assert_array_equal(result3, data + 2)

        # test run same key tensor
        arr5 = mt.ones((10, 10), chunk_size=3)
        result1 = arr5.to_numpy()

        del arr5
        arr6 = mt.ones((10, 10), chunk_size=3)
        result2 = arr6.to_numpy()

        np.testing.assert_array_equal(result1, result2)

        # test copy, make sure it will not let the execution cache missed
        df = md.DataFrame(mt.ones((10, 3), chunk_size=5))
        executed = [False]

        def add_one(x):
            if executed[0]:  # pragma: no cover
                raise ValueError('executed before')
            return x + 1

        df2 = df.apply(add_one)
        pd.testing.assert_frame_equal(df2.to_pandas(),
                                      pd.DataFrame(np.ones((10, 3)) + 1))

        executed[0] = True

        df3 = df2.copy()
        df4 = df3 * 2
        pd.testing.assert_frame_equal(df4.to_pandas(),
                                      pd.DataFrame(np.ones((10, 3)) * 4))
Beispiel #8
0
    def testGraphWithSplit(self):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        arr = mt.ones(12, chunks=4)
        arr_split = mt.split(arr, 2)
        arr_sum = arr_split[0] + arr_split[1]

        graph = arr_sum.build_graph(compose=False)
        serialized_graph = serialize_graph(graph)
        chunked_graph = arr_sum.build_graph(compose=False, tiled=True)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            kv_store_ref = pool.create_actor(KVStoreActor,
                                             uid=KVStoreActor.default_name())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_name(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            graph_ref.prepare_graph(compose=False)
            graph_data = kv_store_ref.read(
                '/sessions/%s/graphs/%s/chunk_graph' %
                (session_id, graph_key)).value
            self.assertIsNotNone(graph_data)
            fetched_graph = deserialize_graph(graph_data)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.scan_node()
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[
                    n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[
                    n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            def write_mock_meta():
                resource_ref.set_worker_meta('localhost:12345',
                                             dict(hardware=dict(cpu_total=4)))
                resource_ref.set_worker_meta('localhost:23456',
                                             dict(hardware=dict(cpu_total=4)))

            v = gevent.spawn(write_mock_meta)
            v.join()

            graph_ref.place_initial_chunks()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                self.assertEqual(op_infos[n.op.key]['op_name'],
                                 type(n.op).__name__)

                io_meta = op_infos[n.op.key]['io_meta']
                orig_io_meta = dict(
                    predecessors=list(
                        set(pn.op.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    successors=list(
                        set(sn.op.key
                            for sn in fetched_graph.iter_successors(n))),
                    input_chunks=list(
                        set(pn.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    chunks=list(c.key for c in n.op.outputs),
                )
                self.assertSetEqual(set(io_meta['predecessors']),
                                    set(orig_io_meta['predecessors']))
                self.assertSetEqual(set(io_meta['successors']),
                                    set(orig_io_meta['successors']))
                self.assertSetEqual(set(io_meta['input_chunks']),
                                    set(orig_io_meta['input_chunks']))
                self.assertSetEqual(set(io_meta['chunks']),
                                    set(orig_io_meta['chunks']))

                self.assertEqual(op_infos[n.op.key]['output_size'],
                                 sum(ch.nbytes for ch in n.op.outputs))
Beispiel #9
0
    def testSameKey(self, *_):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())

        arr = mt.ones((5, 5), chunk_size=3)
        arr2 = mt.concatenate((arr, arr))

        graph = arr2.build_graph(compose=False)
        serialized_graph = serialize_graph(graph)
        chunked_graph = arr2.build_graph(compose=False, tiled=True)

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor,
                              uid=AssignerActor.gen_name(session_id))
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialized_graph,
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            graph_ref.prepare_graph(compose=False)
            fetched_graph = graph_ref.get_chunk_graph()
            self.assertIsNotNone(fetched_graph)
            self.assertEqual(len(chunked_graph), len(fetched_graph))

            graph_ref.scan_node()
            op_infos = graph_ref.get_operand_info()
            for n in fetched_graph:
                depth = op_infos[n.op.key]['optimize']['depth']
                self.assertIsNotNone(depth)
                successor_size = op_infos[
                    n.op.key]['optimize']['successor_size']
                self.assertIsNotNone(successor_size)
                descendant_size = op_infos[
                    n.op.key]['optimize']['descendant_size']
                self.assertIsNotNone(descendant_size)

            resource_ref.set_worker_meta('localhost:12345',
                                         dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456',
                                         dict(hardware=dict(cpu_total=4)))

            graph_ref.place_initial_chunks()
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                if fetched_graph.count_predecessors(n) != 0:
                    continue
                target_worker = op_infos[n.op.key]['target_worker']
                self.assertIsNotNone(target_worker)

            graph_ref.create_operand_actors(_clean_io_meta=False)
            op_infos = graph_ref.get_operand_info()

            for n in fetched_graph:
                self.assertEqual(op_infos[n.op.key]['op_name'],
                                 type(n.op).__name__)

                io_meta = op_infos[n.op.key]['io_meta']
                orig_io_meta = dict(
                    predecessors=list(
                        set(pn.op.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    successors=list(
                        set(sn.op.key
                            for sn in fetched_graph.iter_successors(n))),
                    input_chunks=list(
                        set(pn.key
                            for pn in fetched_graph.iter_predecessors(n))),
                    chunks=list(c.key for c in n.op.outputs),
                )
                self.assertSetEqual(set(io_meta['predecessors']),
                                    set(orig_io_meta['predecessors']))
                self.assertSetEqual(set(io_meta['successors']),
                                    set(orig_io_meta['successors']))
                self.assertSetEqual(set(io_meta['input_chunks']),
                                    set(orig_io_meta['input_chunks']))
                self.assertSetEqual(set(io_meta['chunks']),
                                    set(orig_io_meta['chunks']))

                self.assertEqual(op_infos[n.op.key]['output_size'],
                                 sum(ch.nbytes for ch in n.op.outputs))
Beispiel #10
0
    def testChunkSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)).tiles()

        # pb
        chunk = t.chunks[0]
        serials = self._pb_serial(chunk)
        op, pb = serials[chunk.op, chunk.data]

        self.assertEqual(tuple(pb.index), chunk.index)
        self.assertEqual(pb.key, chunk.key)
        self.assertEqual(tuple(pb.shape), chunk.shape)
        self.assertEqual(int(op.type.split('.', 1)[1]), opcodes.TENSOR_ONES)

        chunk2 = self._pb_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(chunk.op.dtype, chunk2.op.dtype)

        # json
        chunk = t.chunks[0]
        serials = self._json_serial(chunk)

        chunk2 = self._json_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(chunk.op.dtype, chunk2.op.dtype)

        t = tensor(np.random.random((10, 3)), chunk_size=(5, 2)).tiles()

        # pb
        chunk = t.chunks[0]
        serials = self._pb_serial(chunk)
        op, pb = serials[chunk.op, chunk.data]

        self.assertEqual(tuple(pb.index), chunk.index)
        self.assertEqual(pb.key, chunk.key)
        self.assertEqual(tuple(pb.shape), chunk.shape)
        self.assertEqual(int(op.type.split('.', 1)[1]),
                         opcodes.TENSOR_DATA_SOURCE)

        chunk2 = self._pb_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data))

        # json
        chunk = t.chunks[0]
        serials = self._json_serial(chunk)

        chunk2 = self._json_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data))

        t1 = tensor(np.random.random((10, 3)), chunk_size=(5, 2))
        t2 = (t1 + 1).tiles()

        # pb
        chunk1 = get_tiled(t1).chunks[0]
        chunk2 = t2.chunks[0]

        composed_chunk = build_fuse_chunk([chunk1.data, chunk2.data],
                                          TensorFuseChunk)
        serials = self._pb_serial(composed_chunk)
        op, pb = serials[composed_chunk.op, composed_chunk.data]

        self.assertEqual(pb.key, composed_chunk.key)
        self.assertEqual(int(op.type.split('.', 1)[1]), opcodes.FUSE)

        composed_chunk2 = self._pb_deserial(serials)[composed_chunk.data]

        self.assertEqual(composed_chunk.key, composed_chunk2.key)
        self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op))
        self.assertEqual(composed_chunk.composed[0].key,
                         composed_chunk2.composed[0].key)
        self.assertEqual(composed_chunk.composed[-1].key,
                         composed_chunk2.composed[-1].key)

        # json
        chunk1 = get_tiled(t1).chunks[0]
        chunk2 = t2.chunks[0]

        composed_chunk = build_fuse_chunk([chunk1.data, chunk2.data],
                                          TensorFuseChunk)
        serials = self._json_serial(composed_chunk)

        composed_chunk2 = self._json_deserial(serials)[composed_chunk.data]

        self.assertEqual(composed_chunk.key, composed_chunk2.key)
        self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op))
        self.assertEqual(composed_chunk.composed[0].key,
                         composed_chunk2.composed[0].key)
        self.assertEqual(composed_chunk.composed[-1].key,
                         composed_chunk2.composed[-1].key)

        t1 = ones((10, 3), chunk_size=2)
        t2 = ones((3, 5), chunk_size=2)
        c = dot(t1, t2).tiles().chunks[0].inputs[0]

        # pb
        serials = self._pb_serial(c)
        c2 = self._pb_deserial(serials)[c]
        self.assertEqual(c.key, c2.key)

        # json
        serials = self._json_serial(c)
        c2 = self._json_deserial(serials)[c]
        self.assertEqual(c.key, c2.key)
Beispiel #11
0
    def testFromTensor(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        self.assertIsInstance(df.index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertEqual(
            df.op.dtypes[0], tensor.dtype,
            'DataFrame converted from tensor have the wrong dtype')

        df.tiles()
        self.assertEqual(len(df.chunks), 4)
        self.assertIsInstance(df.chunks[0].index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertIsInstance(df.chunks[0].index_value, IndexValue)

        # test converted from 1-d tensor
        tensor2 = mt.array([1, 2, 3])
        # in fact, tensor3 is (3,1)
        tensor3 = mt.array([tensor2]).T

        df2 = dataframe_from_tensor(tensor2)
        df3 = dataframe_from_tensor(tensor3)
        df2.tiles()
        df3.tiles()
        np.testing.assert_equal(df2.chunks[0].index, (0, 0))
        np.testing.assert_equal(df3.chunks[0].index, (0, 0))

        # test converted from scalar
        scalar = mt.array(1)
        np.testing.assert_equal(scalar.ndim, 0)
        with self.assertRaises(TypeError):
            dataframe_from_tensor(scalar)

        # from tensor with given index
        df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2))
        df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))
        pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))

        # from tensor with given columns
        df = dataframe_from_tensor(tensor, columns=list('abcdefghij'))
        df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].columns.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[1].columns.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[2].columns.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[3].columns.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))

        # test series from tensor
        tensor = mt.random.rand(10, chunk_size=4)
        series = series_from_tensor(tensor, name='a')

        self.assertEqual(series.dtype, tensor.dtype)
        self.assertEqual(series.name, 'a')
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.RangeIndex(10))

        series.tiles()
        self.assertEqual(len(series.chunks), 3)
        pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(),
                                      pd.RangeIndex(0, 4))
        self.assertEqual(series.chunks[0].name, 'a')
        pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(),
                                      pd.RangeIndex(4, 8))
        self.assertEqual(series.chunks[1].name, 'a')
        pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(),
                                      pd.RangeIndex(8, 10))
        self.assertEqual(series.chunks[2].name, 'a')

        with self.assertRaises(TypeError):
            series_from_tensor(mt.ones((10, 10)))
Beispiel #12
0
    def testExecutorWithGeventProvider(self):
        executor = Executor(sync_provider_type=Executor.SyncProviderType.GEVENT)

        a = mt.ones((10, 10), chunk_size=2)
        res = executor.execute_tensor(a, concat=True)[0]
        np.testing.assert_array_equal(res, np.ones((10, 10)))
Beispiel #13
0
    def testMain(self):
        session_id = uuid.uuid1()
        scheduler_address = '127.0.0.1:' + self.scheduler_port
        actor_client = new_client()
        session_ref = actor_client.create_actor(SessionActor,
                                                uid=SessionActor.gen_name(session_id),
                                                address=scheduler_address,
                                                session_id=session_id)
        a = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1
        b = mt.ones((100, 100), chunk_size=30) * 2 * 1 + 1
        c = (a * b * 2 + 1).sum()
        graph = c.build_graph()
        targets = [c.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tensor_graph(json.dumps(graph.to_json()),
                                        graph_key, target_tensors=targets)

        state = self.wait_for_termination(session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, c.key)
        expected = (np.ones(a.shape) * 2 * 1 + 1) ** 2 * 2 + 1
        assert_array_equal(loads(result), expected.sum())

        graph_key = uuid.uuid1()
        session_ref.submit_tensor_graph(json.dumps(graph.to_json()),
                                        graph_key, target_tensors=targets)

        # todo this behavior may change when eager mode is introduced
        state = self.wait_for_termination(session_ref, graph_key)
        self.assertEqual(state, GraphState.FAILED)

        a = mt.ones((100, 50), chunk_size=35) * 2 + 1
        b = mt.ones((50, 200), chunk_size=35) * 2 + 1
        c = a.dot(b)
        graph = c.build_graph()
        targets = [c.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tensor_graph(json.dumps(graph.to_json()),
                                        graph_key, target_tensors=targets)

        state = self.wait_for_termination(session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)
        result = session_ref.fetch_result(graph_key, c.key)
        assert_array_equal(loads(result), np.ones((100, 200)) * 450)

        base_arr = np.random.random((100, 100))
        a = mt.array(base_arr)
        sumv = reduce(operator.add, [a[:10, :10] for _ in range(10)])
        graph = sumv.build_graph()
        targets = [sumv.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tensor_graph(json.dumps(graph.to_json()),
                                        graph_key, target_tensors=targets)

        state = self.wait_for_termination(session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        expected = reduce(operator.add, [base_arr[:10, :10] for _ in range(10)])
        result = session_ref.fetch_result(graph_key, sumv.key)
        assert_array_equal(loads(result), expected)
Beispiel #14
0
    def testTensordot(self):
        from mars.tensor.linalg import tensordot, dot, inner

        t1 = ones((3, 4, 6), chunk_size=2)
        t2 = ones((4, 3, 5), chunk_size=2)
        t3 = tensordot(t1, t2, axes=((0, 1), (1, 0)))

        self.assertEqual(t3.shape, (6, 5))

        t3.tiles()

        self.assertEqual(t3.shape, (6, 5))
        self.assertEqual(len(t3.chunks), 9)

        a = ones((10000, 20000), chunk_size=5000)
        b = ones((20000, 1000), chunk_size=5000)

        with self.assertRaises(ValueError):
            tensordot(a, b)

        a = ones(10, chunk_size=2)
        b = ones((10, 20), chunk_size=2)
        c = dot(a, b)
        self.assertEqual(c.shape, (20, ))
        c.tiles()
        self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))

        a = ones((10, 20), chunk_size=2)
        b = ones(20, chunk_size=2)
        c = dot(a, b)
        self.assertEqual(c.shape, (10, ))
        c.tiles()
        self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))

        v = ones((100, 100), chunk_size=10)
        tv = v.dot(v)
        self.assertEqual(tv.shape, (100, 100))
        tv.tiles()
        self.assertEqual(tv.shape, tuple(sum(s) for s in tv.nsplits))

        a = ones((10, 20), chunk_size=2)
        b = ones((30, 20), chunk_size=2)
        c = inner(a, b)
        self.assertEqual(c.shape, (10, 30))
        c.tiles()
        self.assertEqual(c.shape, tuple(sum(s) for s in c.nsplits))
Beispiel #15
0
    def testPrepareQuota(self, *_):
        pinned = [True]

        def _mock_pin(graph_key, chunk_keys):
            from mars.errors import PinChunkFailed
            if pinned[0]:
                raise PinChunkFailed
            return chunk_keys

        ChunkHolderActor.pin_chunks.side_effect = _mock_pin

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(MockSenderActor,
                              mock_data,
                              'in',
                              uid='w:mock_sender')
            chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name())

            import mars.tensor as mt
            from mars.tensor.expressions.datasource import TensorFetch
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            chunk_meta_ref.set_chunk_meta(session_id,
                                          modified_chunk.key,
                                          size=mock_data.nbytes,
                                          shape=mock_data.shape,
                                          workers=('0.0.0.0:1234',
                                                   pool_address))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())

                start_time = time.time()

                execution_ref.enqueue_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    time.sleep(1)
                    pinned[0] = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 1)
Beispiel #16
0
    def testSimpleExecution(self):
        pool_address = f'127.0.0.1:{get_next_port()}'
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)
            pool.create_actor(CpuCalcActor, uid='w:1:calc-a')
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.datasource import TensorOnes
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((10, 8), chunk_size=10)
            arr_add = mt.ones((10, 8), chunk_size=10)
            arr2 = arr + arr_add
            graph = arr2.build_graph(fuse_enabled=False, tiled=True)

            arr = get_tiled(arr)
            arr2 = get_tiled(arr2)

            metas = dict()
            for chunk in graph:
                if isinstance(chunk.op, TensorOnes):
                    chunk._op = TensorFetch(
                        dtype=chunk.dtype,
                        _outputs=[weakref.ref(o) for o in chunk.op.outputs],
                        _key=chunk.op.key)
                    metas[chunk.key] = WorkerMeta(chunk.nbytes, chunk.shape,
                                                  pool_address)

            with self.run_actor_test(pool) as test_actor:
                session_id = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                self.waitp(
                    storage_client.put_objects(
                        session_id, [arr.chunks[0].key],
                        [np.ones((10, 8), dtype=np.int16)],
                        [DataStorageDevice.SHARED_MEMORY]), )

                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())

                def _validate(*_):
                    data = test_actor.shared_store.get(session_id,
                                                       arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                graph_key = str(uuid.uuid4())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[arr2.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())

                def _validate(*_):
                    data = test_actor.shared_store.get(session_id,
                                                       arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Beispiel #17
0
    def testPrepareSpilled(self):
        from mars.worker.spill import write_spill_file

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(
            'mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)
            chunk_meta_ref = pool.actor_ref(ChunkMetaActor.default_name())
            pool.actor_ref(ChunkHolderActor.default_name())

            import mars.tensor as mt
            from mars.tensor.expressions.datasource import TensorFetch
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_ref.set_chunk_meta(session_id,
                                          modified_chunk.key,
                                          size=mock_data.nbytes,
                                          shape=mock_data.shape,
                                          workers=('0.0.0.0:1234',
                                                   pool_address))
            write_spill_file(modified_chunk.key, mock_data)

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:

                def _validate(_):
                    data = test_actor._chunk_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Beispiel #18
0
    def testPrepareQuota(self, *_):
        pinned = True

        orig_pin = SharedHolderActor.pin_data_keys

        def _mock_pin(self, session_id, chunk_keys, token):
            from mars.errors import PinDataKeyFailed
            if pinned:
                raise PinDataKeyFailed
            return orig_pin(self, session_id, chunk_keys, token)

        pool_address = f'127.0.0.1:{get_next_port()}'
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with patch_method(SharedHolderActor.pin_data_keys, new=_mock_pin), \
                create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(MockSenderActor, [mock_data],
                              'in',
                              uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.actor_ref(WorkerClusterInfoActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(fuse_enabled=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            metas = {
                modified_chunk.key:
                WorkerMeta(mock_data.nbytes, mock_data.shape,
                           ('0.0.0.0:1234',
                            pool_address.replace('127.0.0.1', 'localhost')))
            }
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id,
                    graph_key,
                    serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]),
                    metas,
                    _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    nonlocal pinned
                    time.sleep(0.5)
                    pinned = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 0.5)
Beispiel #19
0
    def testFromTensor(self):
        tensor = mt.random.rand(10, 10, chunk_size=5)
        df = dataframe_from_tensor(tensor)
        self.assertIsInstance(df.index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertEqual(
            df.op.dtypes[0], tensor.dtype,
            'DataFrame converted from tensor have the wrong dtype')

        df = df.tiles()
        self.assertEqual(len(df.chunks), 4)
        self.assertIsInstance(df.chunks[0].index_value._index_value,
                              IndexValue.RangeIndex)
        self.assertIsInstance(df.chunks[0].index_value, IndexValue)

        # test converted from 1-d tensor
        tensor2 = mt.array([1, 2, 3])
        # in fact, tensor3 is (3,1)
        tensor3 = mt.array([tensor2]).T

        df2 = dataframe_from_tensor(tensor2)
        df3 = dataframe_from_tensor(tensor3)
        df2 = df2.tiles()
        df3 = df3.tiles()
        np.testing.assert_equal(df2.chunks[0].index, (0, 0))
        np.testing.assert_equal(df3.chunks[0].index, (0, 0))

        # test converted from scalar
        scalar = mt.array(1)
        np.testing.assert_equal(scalar.ndim, 0)
        with self.assertRaises(TypeError):
            dataframe_from_tensor(scalar)

        # from tensor with given index
        df = dataframe_from_tensor(tensor, index=np.arange(0, 20, 2))
        df = df.tiles()
        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[1].index_value.to_pandas(),
                                      pd.Index(np.arange(0, 10, 2)))
        pd.testing.assert_index_equal(df.chunks[2].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))
        pd.testing.assert_index_equal(df.chunks[3].index_value.to_pandas(),
                                      pd.Index(np.arange(10, 20, 2)))

        # from tensor with index that is a tensor as well
        df = dataframe_from_tensor(tensor, index=mt.arange(0, 20, 2))
        df = df.tiles()
        self.assertEqual(len(df.chunks[0].inputs), 2)
        self.assertFalse(df.chunks[0].index_value.has_value())

        # from tensor with given columns
        df = dataframe_from_tensor(tensor, columns=list('abcdefghij'))
        df = df.tiles()
        pd.testing.assert_index_equal(df.dtypes.index,
                                      pd.Index(list('abcdefghij')))
        pd.testing.assert_index_equal(df.chunks[0].columns_value.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[0].dtypes.index,
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[1].columns_value.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[1].dtypes.index,
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[2].columns_value.to_pandas(),
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[2].dtypes.index,
                                      pd.Index(['a', 'b', 'c', 'd', 'e']))
        pd.testing.assert_index_equal(df.chunks[3].columns_value.to_pandas(),
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))
        pd.testing.assert_index_equal(df.chunks[3].dtypes.index,
                                      pd.Index(['f', 'g', 'h', 'i', 'j']))

        # test series from tensor
        tensor = mt.random.rand(10, chunk_size=4)
        series = series_from_tensor(tensor, name='a')

        self.assertEqual(series.dtype, tensor.dtype)
        self.assertEqual(series.name, 'a')
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.RangeIndex(10))

        series = series.tiles()
        self.assertEqual(len(series.chunks), 3)
        pd.testing.assert_index_equal(series.chunks[0].index_value.to_pandas(),
                                      pd.RangeIndex(0, 4))
        self.assertEqual(series.chunks[0].name, 'a')
        pd.testing.assert_index_equal(series.chunks[1].index_value.to_pandas(),
                                      pd.RangeIndex(4, 8))
        self.assertEqual(series.chunks[1].name, 'a')
        pd.testing.assert_index_equal(series.chunks[2].index_value.to_pandas(),
                                      pd.RangeIndex(8, 10))
        self.assertEqual(series.chunks[2].name, 'a')

        df = dataframe_from_1d_tensors(
            [mt.tensor(np.random.rand(4)),
             mt.tensor(np.random.rand(4))])
        pd.testing.assert_index_equal(df.columns_value.to_pandas(),
                                      pd.RangeIndex(2))

        df = df.tiles()

        pd.testing.assert_index_equal(df.chunks[0].index_value.to_pandas(),
                                      pd.RangeIndex(4))

        series = series_from_tensor(mt.random.rand(4))
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.RangeIndex(4))

        series = series_from_tensor(mt.random.rand(4), index=[1, 2, 3])
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.Index([1, 2, 3]))

        series = series_from_tensor(mt.random.rand(4),
                                    index=pd.Index([1, 2, 3], name='my_index'))
        pd.testing.assert_index_equal(series.index_value.to_pandas(),
                                      pd.Index([1, 2, 3], name='my_index'))
        self.assertEqual(series.index_value.name, 'my_index')

        with self.assertRaises(TypeError):
            series_from_tensor(mt.ones((10, 10)))

        # index has wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_tensor(mt.random.rand(4, 3),
                                  index=mt.random.rand(5))

        # columns have wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_tensor(mt.random.rand(4, 3), columns=['a', 'b'])

        # index should be 1-d
        with self.assertRaises(ValueError):
            dataframe_from_tensor(mt.tensor(np.random.rand(3, 2)),
                                  index=mt.tensor(np.random.rand(3, 2)))

        # 1-d tensors should have same shapen
        with self.assertRaises(ValueError):
            dataframe_from_1d_tensors(
                [mt.tensor(np.random.rand(3)),
                 mt.tensor(np.random.rand(2))])

        # index has wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_1d_tensors([mt.tensor(np.random.rand(3))],
                                      index=mt.tensor(np.random.rand(2)))

        # columns have wrong shape
        with self.assertRaises(ValueError):
            dataframe_from_1d_tensors([mt.tensor(np.random.rand(3))],
                                      columns=['a', 'b'])

        # index should be 1-d
        with self.assertRaises(ValueError):
            series_from_tensor(mt.random.rand(4), index=mt.random.rand(4, 3))
Beispiel #20
0
    def testPrepareSpilled(self):
        pool_address = f'127.0.0.1:{get_next_port()}'
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(
            prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(IORunnerActor)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(fuse_enabled=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {
                modified_chunk.key:
                WorkerMeta(mock_data.nbytes, mock_data.shape,
                           ('0.0.0.0:1234', pool_address))
            }

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                self.waitp(
                    test_actor.storage_client.put_objects(
                        session_id, [modified_chunk.key], [mock_data],
                        [DataStorageDevice.PROC_MEMORY
                         ]).then(lambda *_: test_actor.storage_client.copy_to(
                             session_id, [modified_chunk.key],
                             [DataStorageDevice.DISK])))
                test_actor.storage_client.delete(
                    session_id, [modified_chunk.key],
                    [DataStorageDevice.PROC_MEMORY])

                def _validate(*_):
                    data = test_actor.shared_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), metas, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Beispiel #21
0
    def testFetch(self):
        sess = new_session()

        arr1 = mt.ones((10, 5), chunk_size=3)

        r1 = sess.run(arr1)
        r2 = sess.run(arr1)
        np.testing.assert_array_equal(r1, r2)

        executor = sess._sess._executor
        executor.chunk_result[get_tiled(arr1).chunks[0].key] = np.ones(
            (3, 3)) * 2
        r3 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r3[:3, :3], np.ones((3, 3)) * 3)

        # rerun to ensure arr1's chunk results still exist
        r4 = sess.run(arr1 + 1)
        np.testing.assert_array_equal(r4[:3, :3], np.ones((3, 3)) * 3)

        arr2 = mt.ones((10, 5), chunk_size=3)
        r5 = sess.run(arr2)
        np.testing.assert_array_equal(r5[:3, :3], np.ones((3, 3)) * 2)

        r6 = sess.run(arr2 + 1)
        np.testing.assert_array_equal(r6[:3, :3], np.ones((3, 3)) * 3)

        df = md.DataFrame(np.random.rand(10, 2), columns=list('ab'))
        s = df['a'].map(lambda x: np.ones((3, 3)), dtype='object').sum()

        np.testing.assert_array_equal(s.execute().fetch(),
                                      np.ones((3, 3)) * 10)

        # test fetch multiple tensors
        raw = np.random.rand(5, 10)
        arr1 = mt.ones((5, 10), chunk_size=5)
        arr2 = mt.tensor(raw, chunk_size=3)
        arr3 = mt.sum(arr2)

        sess.run(arr1, arr2, arr3)

        fetch1, fetch2, fetch3 = sess.fetch(arr1, arr2, arr3)
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        fetch1, fetch2, fetch3 = sess.fetch([arr1, arr2, arr3])
        np.testing.assert_array_equal(fetch1, np.ones((5, 10)))
        np.testing.assert_array_equal(fetch2, raw)
        np.testing.assert_almost_equal(fetch3, raw.sum())

        raw = np.random.rand(5, 10)
        arr = mt.tensor(raw, chunk_size=5)
        s = arr.sum()

        self.assertAlmostEqual(s.execute().fetch(), raw.sum())

        def _execute_ds(*_):  # pragma: no cover
            raise ValueError('cannot run random again')

        try:
            register(ArrayDataSource, _execute_ds)

            self.assertAlmostEqual(s.fetch(), raw.sum())
        finally:
            del Executor._op_runners[ArrayDataSource]
Beispiel #22
0
    def testFetchRemoteData(self):
        pool_address = f'127.0.0.1:{get_next_port()}'
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(InProcHolderActor)
            pool.create_actor(MockSenderActor, [mock_data],
                              'in',
                              uid='w:mock_sender')

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(fuse_enabled=False, tiled=True)

            arr_add = get_tiled(arr_add)
            result_tensor = get_tiled(result_tensor)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype,
                _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(
                    session_id,
                    graph_key,
                    serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]),
                    None,
                    _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas = {
                modified_chunk.key:
                WorkerMeta(mock_data.nbytes, mock_data.shape,
                           ('0.0.0.0:1234', ))
            }
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(
                    session_id,
                    graph_key,
                    serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]),
                    metas,
                    _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            metas[modified_chunk.key] = WorkerMeta(
                mock_data.nbytes, mock_data.shape,
                ('0.0.0.0:1234', pool_address.replace('127.0.0.1',
                                                      'localhost')))
            with self.run_actor_test(pool) as test_actor:

                def _validate(*_):
                    data = test_actor.shared_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(
                    session_id,
                    graph_key,
                    serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]),
                    metas,
                    _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Beispiel #23
0
    def testWebApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            self.assertEqual(sess.count_workers(), 1)
            a = mt.ones((100, 100), chunk_size=30)
            b = mt.ones((100, 100), chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            assert_array_equal(value, np.ones((100, 100)) * 100)

            # check resubmission
            value2 = sess.run(c, timeout=timeout)
            assert_array_equal(value, value2)

            # check when local compression libs are missing
            from mars.serialize import dataserializer
            try:
                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                assert_array_equal(value, np.ones((10, 10)) * 10)

                dataserializer.decompressors[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.decompressobjs[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = None

                assert_array_equal(sess.fetch(c), np.ones((10, 10)) * 10)
            finally:
                dataserializer.decompressors[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompress
                dataserializer.decompressobjs[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompressobj
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = dataserializer.lz4_open

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunk_size=30)
            b = mt.array(vb, chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            assert_array_equal(value, va.dot(vb))

            graphs = sess.get_graph_states()

            # make sure status got uploaded
            time.sleep(1.5)

            # check web UI requests
            res = requests.get(service_ep)
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/scheduler' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/scheduler/127.0.0.1:%s' %
                               (service_ep, self.scheduler_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/worker' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/worker/127.0.0.1:%s' %
                               (service_ep, self.worker_port))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/worker/127.0.0.1:%s/timeline' %
                               (service_ep, self.worker_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/session' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            task_id = next(iter(graphs.keys()))
            res = requests.get('%s/session/%s/graph/%s' %
                               (service_ep, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/session/%s/graph/%s/running_nodes' %
                               (service_ep, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)

            from mars.web.task_pages import PROGRESS_APP_NAME
            res = requests.get(
                '%s/%s?session_id=%s&task_id=%s' %
                (service_ep, PROGRESS_APP_NAME, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)

            from mars.web.worker_pages import TIMELINE_APP_NAME
            res = requests.get(
                '%s/%s?endpoint=127.0.0.1:%s' %
                (service_ep, TIMELINE_APP_NAME, self.worker_port))
            self.assertEqual(res.status_code, 200)

        # make sure all chunks freed when session quits
        from mars.worker.storage import StorageManagerActor
        actor_client = new_client()
        storage_manager_ref = actor_client.actor_ref(
            StorageManagerActor.default_uid(),
            address='127.0.0.1:' + str(self.worker_port))
        self.assertFalse(bool(storage_manager_ref.dump_keys()))
Beispiel #24
0
    def testEagerMode(self, *_):
        with new_cluster(scheduler_n_process=2,
                         worker_n_process=2,
                         shared_memory='20M',
                         web=True) as cluster:

            self.assertIsInstance(Session.default_or_local()._sess,
                                  LocalClusterSession)

            with option_context({'eager_mode': True}):
                a_data = np.random.rand(10, 10)

                a = mt.tensor(a_data, chunk_size=3)
                np.testing.assert_array_equal(a, a_data)

                r1 = a + 1
                expected1 = a_data + 1
                np.testing.assert_array_equal(r1, expected1)

                r2 = r1.dot(r1)
                expected2 = expected1.dot(expected1)
                np.testing.assert_array_almost_equal(r2, expected2)

            a = mt.ones((10, 10), chunk_size=3)
            with self.assertRaises(ValueError):
                a.fetch()

            r = a.dot(a)
            np.testing.assert_array_equal(r.execute(), np.ones((10, 10)) * 10)

            with new_session('http://' + cluster._web_endpoint).as_default():
                self.assertIsInstance(Session.default_or_local()._sess,
                                      WebSession)

                with option_context({'eager_mode': True}):
                    a_data = np.random.rand(10, 10)

                    a = mt.tensor(a_data, chunk_size=3)
                    np.testing.assert_array_equal(a, a_data)

                    r1 = a + 1
                    expected1 = a_data + 1
                    np.testing.assert_array_equal(r1, expected1)

                    r2 = r1.dot(r1)
                    expected2 = expected1.dot(expected1)
                    np.testing.assert_array_almost_equal(r2, expected2)

                    web_session = Session.default_or_local()._sess
                    self.assertEqual(web_session.get_task_count(), 3)

                a = mt.ones((10, 10), chunk_size=3)
                with self.assertRaises(ValueError):
                    a.fetch()

                r = a.dot(a)
                np.testing.assert_array_equal(r.execute(),
                                              np.ones((10, 10)) * 10)

            with new_session('http://' + cluster._web_endpoint).as_default():
                from mars.dataframe.datasource.dataframe import from_pandas as from_pandas_df
                from mars.dataframe.datasource.series import from_pandas as from_pandas_series
                from mars.dataframe.arithmetic import add

                self.assertIsInstance(Session.default_or_local()._sess,
                                      WebSession)

                with option_context({'eager_mode': True}):
                    data1 = pd.DataFrame(
                        np.random.rand(10, 10),
                        index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9],
                        columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7])
                    df1 = from_pandas_df(data1, chunk_size=5)
                    pd.testing.assert_frame_equal(df1.fetch(), data1)

                    data2 = pd.DataFrame(
                        np.random.rand(10, 10),
                        index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3],
                        columns=[5, 9, 12, 3, 11, 10, 6, 4, 1, 2])
                    df2 = from_pandas_df(data2, chunk_size=6)
                    pd.testing.assert_frame_equal(df2.fetch(), data2)

                    df3 = add(df1, df2)
                    pd.testing.assert_frame_equal(df3.fetch(), data1 + data2)

                    s1 = pd.Series(np.random.rand(10),
                                   index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3])
                    series1 = from_pandas_series(s1)
                    pd.testing.assert_series_equal(series1.fetch(), s1)

                web_session = Session.default_or_local()._sess
                self.assertEqual(web_session.get_task_count(), 4)
Beispiel #25
0
    def testWebApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            session_id = sess._session_id
            self.assertEqual(sess.count_workers(), 1)

            a = mt.ones((100, 100), chunk_size=30)
            b = mt.ones((100, 100), chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            np.testing.assert_array_equal(value, np.ones((100, 100)) * 100)

            # check resubmission
            value2 = sess.run(c, timeout=timeout)
            np.testing.assert_array_equal(value, value2)

            # check when local compression libs are missing
            from mars.serialize import dataserializer
            try:
                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                np.testing.assert_array_equal(value, np.ones((10, 10)) * 10)

                dataserializer.decompressors[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.decompressobjs[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = None

                np.testing.assert_array_equal(sess.fetch(c),
                                              np.ones((10, 10)) * 10)
            finally:
                dataserializer.decompressors[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompress
                dataserializer.decompressobjs[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompressobj
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = dataserializer.lz4_open

            # check serialization by pickle
            try:
                sess._sess._serial_type = SerialType.PICKLE

                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                np.testing.assert_array_equal(value, np.ones((10, 10)) * 10)

                raw = pd.DataFrame(np.random.rand(10, 5),
                                   columns=list('ABCDE'),
                                   index=pd.RangeIndex(10, 0, -1))
                data = md.DataFrame(raw).astype({'E': 'arrow_string'})
                ret_data = data.execute(session=sess).fetch(session=sess)
                self.assertEqual(ret_data.dtypes['E'], np.dtype('O'))
                pd.testing.assert_frame_equal(ret_data.astype({'E': 'float'}),
                                              raw,
                                              check_less_precise=True)

                raw = pd.Series(np.random.rand(10),
                                index=pd.RangeIndex(10, 0, -1),
                                name='r')
                data = md.Series(raw).astype('Arrow[string]')
                ret_data = data.execute(session=sess).fetch(session=sess)
                self.assertEqual(ret_data.dtype, np.dtype('O'))
                pd.testing.assert_series_equal(ret_data.astype('float'), raw)
            finally:
                sess._sess._serial_type = SerialType.ARROW

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunk_size=30)
            b = mt.array(vb, chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            np.testing.assert_array_equal(value, va.dot(vb))

            # test fetch log
            def f():
                print('test')

            r = mr.spawn(f).execute(session=sess, timeout=timeout)
            self.assertEqual(str(r.fetch_log()).strip(), 'test')
            self.assertEqual(str(r.fetch_log(offsets=0)).strip(), 'test')
            self.assertEqual(str(r.fetch_log()).strip(), '')
            self.assertEqual(
                str(r.fetch_log(offsets='-0.003k', sizes=2)).strip(), 'st')

            graphs = sess.get_graph_states()

            # make sure status got uploaded
            time.sleep(1.5)

            # check web UI requests
            res = requests.get(service_ep)
            self.assertEqual(res.status_code, 200)

            res = requests.get(f'{service_ep}/scheduler')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/scheduler/127.0.0.1:{self.scheduler_port}')
            self.assertEqual(res.status_code, 200)

            res = requests.get(f'{service_ep}/worker')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/worker/127.0.0.1:{self.worker_port}')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/worker/127.0.0.1:{self.worker_port}/timeline')
            self.assertEqual(res.status_code, 200)

            res = requests.get(f'{service_ep}/session')
            self.assertEqual(res.status_code, 200)
            task_id = next(iter(graphs.keys()))
            res = requests.get(
                f'{service_ep}/session/{session_id}/graph/{task_id}')
            self.assertEqual(res.status_code, 200)
            res = requests.get(
                f'{service_ep}/session/{session_id}/graph/{task_id}/running_nodes'
            )
            self.assertEqual(res.status_code, 200)

            from mars.web.task_pages import PROGRESS_APP_NAME
            res = requests.get(
                f'{service_ep}/{PROGRESS_APP_NAME}?session_id={session_id}&task_id={task_id}'
            )
            self.assertEqual(res.status_code, 200)

            from mars.web.worker_pages import TIMELINE_APP_NAME
            res = requests.get(
                f'{service_ep}/{TIMELINE_APP_NAME}?endpoint=127.0.0.1:{self.worker_port}'
            )
            self.assertEqual(res.status_code, 200)

        # make sure all chunks freed when session quits
        from mars.worker.storage import StorageManagerActor
        actor_client = new_client()
        storage_manager_ref = actor_client.actor_ref(
            StorageManagerActor.default_uid(),
            address='127.0.0.1:' + str(self.worker_port))
        self.assertSetEqual(set(storage_manager_ref.dump_keys()), set())
Beispiel #26
0
 def f():
     assert get_default_session().session_id == session_id
     return mt.ones((2, 3)).sum().to_numpy()
Beispiel #27
0
 def testSameKeyPreparation(self, *_):
     arr = mt.ones((5, 5), chunk_size=3)
     arr2 = mt.concatenate((arr, arr))
     with self.prepare_graph_in_pool(arr2, clean_io_meta=False):
         pass
Beispiel #28
0
    def testSimpleExecution(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)
            pool.create_actor(CpuCalcActor, uid='w:1:calc-a')

            import mars.tensor as mt
            from mars.tensor.expressions.datasource import TensorOnes, TensorFetch
            arr = mt.ones((10, 8), chunk_size=10)
            arr_add = mt.ones((10, 8), chunk_size=10)
            arr2 = arr + arr_add
            graph = arr2.build_graph(compose=False, tiled=True)

            for chunk in graph:
                if isinstance(chunk.op, TensorOnes):
                    chunk._op = TensorFetch(
                        dtype=chunk.dtype,
                        _outputs=[weakref.ref(o) for o in chunk.op.outputs],
                        _key=chunk.op.key)

            with self.run_actor_test(pool) as test_actor:

                session_id = str(uuid.uuid4())
                chunk_holder_ref = test_actor.promise_ref(
                    ChunkHolderActor.default_name())

                refs = test_actor._chunk_store.put(
                    session_id, arr.chunks[0].key,
                    np.ones((10, 8), dtype=np.int16))
                chunk_holder_ref.register_chunk(session_id, arr.chunks[0].key)
                del refs

                refs = test_actor._chunk_store.put(
                    session_id, arr_add.chunks[0].key,
                    np.ones((10, 8), dtype=np.int16))
                chunk_holder_ref.register_chunk(session_id,
                                                arr_add.chunks[0].key)
                del refs

                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())

                def _validate(_):
                    data = test_actor._chunk_store.get(session_id,
                                                       arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                graph_key = str(uuid.uuid4())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[arr2.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _tell=True))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())

                def _validate(_):
                    data = test_actor._chunk_store.get(session_id,
                                                       arr2.chunks[0].key)
                    assert_array_equal(data, 2 * np.ones((10, 8)))

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Beispiel #29
0
    def testChunkSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)).tiles()

        # pb
        chunk = t.chunks[0]
        serials = self._pb_serial(chunk)
        op, pb = serials[chunk.op, chunk.data]

        self.assertEqual(tuple(pb.index), chunk.index)
        self.assertEqual(pb.key, chunk.key)
        self.assertEqual(tuple(pb.shape), chunk.shape)
        self.assertEqual(int(op.type.split('.', 1)[1]), OperandDef.TENSOR_ONES)

        chunk2 = self._pb_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(chunk.op.dtype, chunk2.op.dtype)

        # json
        chunk = t.chunks[0]
        serials = self._json_serial(chunk)

        chunk2 = self._json_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(chunk.op.dtype, chunk2.op.dtype)

        t = tensor(np.random.random((10, 3)), chunk_size=(5, 2)).tiles()

        # pb
        chunk = t.chunks[0]
        serials = self._pb_serial(chunk)
        op, pb = serials[chunk.op, chunk.data]

        self.assertEqual(tuple(pb.index), chunk.index)
        self.assertEqual(pb.key, chunk.key)
        self.assertEqual(tuple(pb.shape), chunk.shape)
        self.assertEqual(int(op.type.split('.', 1)[1]),
                         OperandDef.TENSOR_DATA_SOURCE)

        chunk2 = self._pb_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data))

        # json
        chunk = t.chunks[0]
        serials = self._json_serial(chunk)

        chunk2 = self._json_deserial(serials)[chunk.data]

        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.key, chunk2.key)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertTrue(np.array_equal(chunk.op.data, chunk2.op.data))

        t = (tensor(np.random.random((10, 3)), chunk_size=(5, 2)) + 1).tiles()

        # pb
        chunk1 = t.chunks[0]
        chunk2 = t.chunks[1]
        fuse_op = TensorFuseChunk()
        composed_chunk = fuse_op.new_chunk(
            chunk1.inputs,
            chunk2.shape,
            _key=chunk2.key,
            _composed=[chunk1.data, chunk2.data])
        serials = self._pb_serial(composed_chunk)
        op, pb = serials[composed_chunk.op, composed_chunk.data]

        self.assertEqual(pb.key, composed_chunk.key)
        self.assertEqual(int(op.type.split('.', 1)[1]), OperandDef.FUSE)
        self.assertEqual(len(pb.composed), 2)

        composed_chunk2 = self._pb_deserial(serials)[composed_chunk.data]

        self.assertEqual(composed_chunk.key, composed_chunk2.key)
        self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op))
        self.assertEqual(composed_chunk.composed[0].inputs[0].key,
                         composed_chunk2.composed[0].inputs[0].key)
        self.assertEqual(composed_chunk.inputs[-1].key,
                         composed_chunk2.inputs[-1].key)

        # json
        chunk1 = t.chunks[0]
        chunk2 = t.chunks[1]
        fuse_op = TensorFuseChunk()
        composed_chunk = fuse_op.new_chunk(
            chunk1.inputs,
            chunk2.shape,
            _key=chunk2.key,
            _composed=[chunk1.data, chunk2.data])
        serials = self._json_serial(composed_chunk)

        composed_chunk2 = self._json_deserial(serials)[composed_chunk.data]

        self.assertEqual(composed_chunk.key, composed_chunk2.key)
        self.assertEqual(type(composed_chunk.op), type(composed_chunk2.op))
        self.assertEqual(composed_chunk.composed[0].inputs[0].key,
                         composed_chunk2.composed[0].inputs[0].key)
        self.assertEqual(composed_chunk.inputs[-1].key,
                         composed_chunk2.inputs[-1].key)

        t1 = ones((10, 3), chunk_size=2)
        t2 = ones((3, 5), chunk_size=2)
        c = dot(t1, t2).tiles().chunks[0].inputs[0]

        # pb
        serials = self._pb_serial(c)
        c2 = self._pb_deserial(serials)[c]
        self.assertEqual(c.key, c2.key)

        # json
        serials = self._json_serial(c)
        c2 = self._json_deserial(serials)[c]
        self.assertEqual(c.key, c2.key)
Beispiel #30
0
    def testWebApi(self):
        service_ep = 'http://127.0.0.1:' + self.web_port
        timeout = 120 if 'CI' in os.environ else -1
        with new_session(service_ep) as sess:
            self.assertEqual(sess.count_workers(), 1)
            a = mt.ones((100, 100), chunk_size=30)
            b = mt.ones((100, 100), chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            assert_array_equal(value, np.ones((100, 100)) * 100)

            # check resubmission
            value2 = sess.run(c, timeout=timeout)
            assert_array_equal(value, value2)

            # check when local compression libs are missing
            from mars.serialize import dataserializer
            try:
                a = mt.ones((10, 10), chunk_size=30)
                b = mt.ones((10, 10), chunk_size=30)
                c = a.dot(b)
                value = sess.run(c, timeout=timeout)
                assert_array_equal(value, np.ones((10, 10)) * 10)

                dataserializer.decompressors[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.decompressobjs[
                    dataserializer.CompressType.LZ4] = None
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = None

                assert_array_equal(sess.fetch(c), np.ones((10, 10)) * 10)
            finally:
                dataserializer.decompressors[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompress
                dataserializer.decompressobjs[
                    dataserializer.CompressType.
                    LZ4] = dataserializer.lz4_decompressobj
                dataserializer.compress_openers[
                    dataserializer.CompressType.LZ4] = dataserializer.lz4_open

            va = np.random.randint(0, 10000, (100, 100))
            vb = np.random.randint(0, 10000, (100, 100))
            a = mt.array(va, chunk_size=30)
            b = mt.array(vb, chunk_size=30)
            c = a.dot(b)
            value = sess.run(c, timeout=timeout)
            assert_array_equal(value, va.dot(vb))

            graphs = sess.get_graph_states()

            # check web UI requests
            res = requests.get(service_ep)
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/task' % (service_ep, ))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/scheduler' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/scheduler?endpoint=127.0.0.1:%s' %
                               (service_ep, self.scheduler_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/worker' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            res = requests.get('%s/worker?endpoint=127.0.0.1:%s' %
                               (service_ep, self.worker_port))
            self.assertEqual(res.status_code, 200)

            res = requests.get('%s/task' % (service_ep, ))
            self.assertEqual(res.status_code, 200)
            task_id = next(iter(graphs.keys()))
            res = requests.get('%s/task?session_id=%s&task_id=%s' %
                               (service_ep, sess._session_id, task_id))
            self.assertEqual(res.status_code, 200)