Exemple #1
0
    def testPrepareSpilled(self):
        from mars.worker.spill import write_spill_file

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])

        options.worker.spill_directory = tempfile.mkdtemp(prefix='mars_worker_prep_spilled-')

        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(SpillActor)
            pool.create_actor(CpuCalcActor)
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            pool.actor_ref(ChunkHolderActor.default_uid())

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            # test meta missing
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            write_spill_file(modified_chunk.key, mock_data)

            # test read from spilled file
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Exemple #2
0
    def testFailoverMessage(self):
        mock_session_id = str(uuid.uuid4())
        mock_graph_key = str(uuid.uuid4())
        mock_chunk_key = str(uuid.uuid4())
        addr = '127.0.0.1:%d' % get_next_port()
        mock_worker_addr = '127.0.0.1:54132'

        options.scheduler.worker_blacklist_time = 0.5

        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            cluster_info_ref = pool.create_actor(
                SchedulerClusterInfoActor, [pool.cluster_info.address],
                uid=SchedulerClusterInfoActor.default_uid())
            session_manager_ref = pool.create_actor(
                SessionManagerActor, uid=SessionManagerActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            session_ref = pool.actor_ref(
                session_manager_ref.create_session(mock_session_id))
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(mock_session_id,
                                             mock_chunk_key,
                                             size=80,
                                             shape=(10, ),
                                             workers=(mock_worker_addr, ))

            with mock.patch(GraphActor.__module__ + '.' + GraphActor.__name__,
                            new=MockGraphActor):
                session_ref.submit_tileable_graph(None, mock_graph_key)
                graph_ref = pool.actor_ref(
                    GraphActor.gen_uid(mock_session_id, mock_graph_key))

                expire_time = time.time(
                ) - options.scheduler.status_timeout - 1
                resource_ref.set_worker_meta(mock_worker_addr,
                                             dict(update_time=expire_time))

                resource_ref.detect_dead_workers(_tell=True)
                pool.sleep(0.2)

                _, removes, lost_chunks = graph_ref.get_worker_change_args()
                self.assertListEqual(removes, [mock_worker_addr])
                self.assertListEqual(lost_chunks, [mock_chunk_key])

                self.assertNotIn(mock_worker_addr,
                                 resource_ref.get_workers_meta())
                resource_ref.set_worker_meta(mock_worker_addr,
                                             dict(update_time=time.time()))
                self.assertNotIn(mock_worker_addr,
                                 resource_ref.get_workers_meta())

                pool.sleep(0.4)
                resource_ref.set_worker_meta(mock_worker_addr,
                                             dict(update_time=time.time()))
                self.assertIn(mock_worker_addr,
                              resource_ref.get_workers_meta())
Exemple #3
0
    def testPrepareQuota(self, *_):
        pinned = [True]

        def _mock_pin(_graph_key, chunk_keys):
            from mars.errors import PinChunkFailed
            if pinned[0]:
                raise PinChunkFailed
            return chunk_keys

        ChunkHolderActor.pin_chunks.side_effect = _mock_pin

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            pool.create_actor(CpuCalcActor)
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())

                start_time = time.time()

                execution_ref.execute_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    time.sleep(1)
                    pinned[0] = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 1)
Exemple #4
0
    def testReadyState(self, *_):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())
        mock_workers = ['localhost:12345', 'localhost:23456']

        def _mock_get_workers_meta(*_, **__):
            return dict((w, dict(hardware=dict(cpu_total=1, memory=1024**3)))
                        for w in mock_workers)

        with patch_method(ResourceActor.get_workers_meta, new=_mock_get_workers_meta) as _, \
                self._prepare_test_graph(session_id, graph_key, mock_workers) as (pool, graph_ref):
            input_op_keys, mid_op_key, output_op_keys = self._filter_graph_level_op_keys(
                graph_ref)
            meta_client = ChunkMetaClient(
                pool, pool.actor_ref(SchedulerClusterInfoActor.default_uid()))
            op_ref = pool.actor_ref(
                OperandActor.gen_uid(session_id, mid_op_key))
            resource_ref = pool.actor_ref(ResourceActor.default_uid())

            input_refs = [
                pool.actor_ref(OperandActor.gen_uid(session_id, k))
                for k in input_op_keys
            ]

            def test_entering_state(target):
                for key in input_op_keys:
                    op_ref.remove_finished_predecessor(key)

                op_ref.start_operand(OperandState.UNSCHEDULED)
                for ref in input_refs:
                    ref.start_operand(OperandState.UNSCHEDULED)

                for ref in input_refs:
                    self.assertEqual(op_ref.get_state(),
                                     OperandState.UNSCHEDULED)
                    ref.start_operand(OperandState.FINISHED)
                pool.sleep(1)
                self.assertEqual(target, op_ref.get_state())
                for w in mock_workers:
                    resource_ref.deallocate_resource(session_id, mid_op_key, w)

            # test entering state with no input meta
            test_entering_state(OperandState.UNSCHEDULED)

            # fill meta
            input_chunk_keys, _, _ = self._filter_graph_level_chunk_keys(
                graph_ref)
            for ck in input_chunk_keys:
                meta_client.set_chunk_meta(session_id,
                                           ck,
                                           workers=('localhost:12345', ),
                                           size=800)

            # test successful entering state
            test_entering_state(OperandState.READY)
Exemple #5
0
    def testReadyState(self, *_):
        session_id = str(uuid.uuid4())
        graph_key = str(uuid.uuid4())
        mock_workers = ['localhost:12345', 'localhost:23456']

        with self._prepare_test_graph(session_id, graph_key,
                                      mock_workers) as (pool, graph_ref):
            input_op_keys, mid_op_key, output_op_keys = self._filter_graph_level_op_keys(
                graph_ref)
            meta_client = ChunkMetaClient(
                pool, pool.actor_ref(SchedulerClusterInfoActor.default_name()))
            op_ref = pool.actor_ref(
                OperandActor.gen_uid(session_id, mid_op_key))

            input_refs = [
                pool.actor_ref(OperandActor.gen_uid(session_id, k))
                for k in input_op_keys
            ]

            def test_entering_state(target):
                for key in input_op_keys:
                    op_ref.remove_finished_predecessor(key)

                op_ref.start_operand(OperandState.UNSCHEDULED)
                for ref in input_refs:
                    ref.start_operand(OperandState.UNSCHEDULED)

                for ref in input_refs:
                    self.assertEqual(op_ref.get_state(),
                                     OperandState.UNSCHEDULED)
                    ref.start_operand(OperandState.FINISHED)
                pool.sleep(0.5)
                self.assertEqual(target, op_ref.get_state())

            # test entering state with no input meta
            test_entering_state(OperandState.UNSCHEDULED)

            # fill meta
            input_chunk_keys, _, _ = self._filter_graph_level_chunk_keys(
                graph_ref)
            for ck in input_chunk_keys:
                meta_client.set_chunk_meta(session_id,
                                           ck,
                                           workers=('localhost:12345', ),
                                           size=800)

            # test entering state with failure in fetching sizes
            with patch_method(ChunkMetaClient.batch_get_chunk_size,
                              new=lambda *_: [None, None]):
                test_entering_state(OperandState.UNSCHEDULED)

            # test successful entering state
            test_entering_state(OperandState.READY)
Exemple #6
0
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                                                 uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta': dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id, chunk_key1, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key2, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key3, size=512, workers=(endpoint2,))

            reply_ref = pool.create_actor(PromiseReplyTestActor)
            reply_callback = ((reply_ref.uid, reply_ref.address), 'reply')
            assigner_ref.apply_for_resource(session_id, op_key, op_info, callback=reply_callback)

            while not reply_ref.get_reply():
                gevent.sleep(0.1)
            _, ret_value = reply_ref.get_reply()
            self.assertEqual(ret_value[0], endpoint1)
Exemple #7
0
    def testFetchRemoteData(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False,
                                        with_resource=True)
            pool.create_actor(CpuCalcActor)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_uid())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)

            import mars.tensor as mt
            from mars.tensor.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234',))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            with self.assertRaises(DependencyMissing):
                self.get_result()

            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                def _validate(_):
                    data = test_actor._chunk_store.get(session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4,)))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _tell=True)

                execution_ref.add_finish_callback(session_id, graph_key, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Exemple #8
0
    def testAssignerActor(self, *_):
        mock_scheduler_addr = f'127.0.0.1:{get_next_port()}'
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(
                SchedulerClusterInfoActor, [pool.cluster_info.address],
                uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, mem_quota=4096))

            resource_ref.set_worker_meta(endpoint1, res)
            resource_ref.set_worker_meta(endpoint2, res)

            assigner_ref = pool.create_actor(AssignerActor,
                                             uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta':
                dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key1,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key2,
                                             size=512,
                                             workers=(endpoint1, ))
            chunk_meta_client.set_chunk_meta(session_id,
                                             chunk_key3,
                                             size=512,
                                             workers=(endpoint2, ))

            uid = OperandActor.gen_uid(session_id, op_key)
            reply_ref = pool.create_actor(MockOperandActor, uid=uid)
            assigner_ref.apply_for_resource(session_id, op_key, op_info)

            while not reply_ref.get_worker_ep():
                gevent.sleep(0.1)
            self.assertEqual(reply_ref.get_worker_ep(), endpoint1)

            with self.run_actor_test(pool) as test_actor, self.assertRaises(
                    TimeoutError):
                assigner_p_ref = test_actor.promise_ref(assigner_ref)

                try:
                    options.scheduler.assign_timeout = 1
                    res = dict(hardware=dict(cpu=4, mem_quota=0))
                    resource_ref.set_worker_meta(endpoint1, res)
                    resource_ref.set_worker_meta(endpoint2, res)

                    self.waitp(
                        assigner_p_ref.apply_for_resource(session_id,
                                                          op_key,
                                                          op_info,
                                                          _promise=True))
                finally:
                    options.scheduler.assign_timeout = 600