예제 #1
0
    def testPrepareQuota(self, *_):
        pinned = [True]

        def _mock_pin(graph_key, chunk_keys):
            from mars.errors import PinChunkFailed
            if pinned[0]:
                raise PinChunkFailed
            return chunk_keys

        ChunkHolderActor.pin_chunks.side_effect = _mock_pin

        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(MockSenderActor, mock_data, 'in', uid='w:mock_sender')
            cluster_info_ref = pool.actor_ref(WorkerClusterInfoActor.default_name())
            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)

            import mars.tensor as mt
            from mars.tensor.expressions.fetch import TensorFetch
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            modified_chunk = arr_add.chunks[0]
            arr_add.chunks[0]._op = TensorFetch(
                dtype=modified_chunk.dtype, _outputs=[weakref.ref(o) for o in modified_chunk.op.outputs],
                _key=modified_chunk.op.key)
            chunk_meta_client.set_chunk_meta(session_id, modified_chunk.key, size=mock_data.nbytes,
                                             shape=mock_data.shape, workers=('0.0.0.0:1234', pool_address))
            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_name())

                start_time = time.time()

                execution_ref.enqueue_graph(
                    session_id, graph_key, serialize_graph(graph),
                    dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(lambda *_: test_actor.set_result(time.time())) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

                def _delay_fun():
                    time.sleep(1)
                    pinned[0] = False

                threading.Thread(target=_delay_fun).start()

            finish_time = self.get_result()
            self.assertGreaterEqual(finish_time, start_time + 1)
예제 #2
0
    def testReExecuteExisting(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc')
            pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder')

            import mars.tensor as mt
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            pool.create_actor(MockSenderActor,
                              mock_data + np.ones((4, )),
                              'out',
                              uid='w:mock_sender')

            def _validate(_):
                data = test_actor.shared_store.get(session_id,
                                                   result_tensor.chunks[0].key)
                assert_array_equal(data, mock_data + np.ones((4, )))

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
예제 #3
0
    def testAddrReject(self):
        try:
            with create_actor_pool(n_process=1) as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                test_ref.test_addr_reject()
                gc.collect()
                wait_test_actor_result(test_ref, 30)
                self.assertListEqual(serve_ref.get_result(), [0, 'WorkerDead'])
        finally:
            self.assertDictEqual(promise._promise_pool, {})
예제 #4
0
    def testDispatch(self, *_):
        call_records = dict()
        group_size = 4

        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent',
                               address=mock_scheduler_addr) as pool:
            dispatch_ref = pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            # actors of g1
            [pool.create_actor(TaskActor, 'g1', call_records) for _ in range(group_size)]
            [pool.create_actor(TaskActor, 'g2', call_records) for _ in range(group_size)]

            self.assertEqual(len(dispatch_ref.get_slots('g1')), group_size)
            self.assertEqual(len(dispatch_ref.get_slots('g2')), group_size)
            self.assertEqual(len(dispatch_ref.get_slots('g3')), 0)

            self.assertEqual(dispatch_ref.get_hash_slot('g1', 'hash_str'),
                             dispatch_ref.get_hash_slot('g1', 'hash_str'))

            # tasks within [0, group_size - 1] will run almost simultaneously,
            # while the last one will be delayed due to lack of

            with self.run_actor_test(pool) as test_actor:
                from mars.promise import Promise
                p = Promise(done=True)
                _dispatch_ref = test_actor.promise_ref(DispatchActor.default_name())

                def _call_on_dispatched(uid, key=None):
                    if uid is None:
                        call_records[key] = 'NoneUID'
                    else:
                        test_actor.promise_ref(uid).queued_call(key, 2, _tell=True)

                for idx in range(group_size + 1):
                    p = p.then(lambda *_: _dispatch_ref.get_free_slot('g1', _promise=True)) \
                        .then(partial(_call_on_dispatched, key='%d_1' % idx)) \
                        .then(lambda *_: _dispatch_ref.get_free_slot('g2', _promise=True)) \
                        .then(partial(_call_on_dispatched, key='%d_2' % idx))

                p.then(lambda *_: _dispatch_ref.get_free_slot('g3', _promise=True)) \
                    .then(partial(_call_on_dispatched, key='N_1')) \
                    .then(lambda *_: test_actor.set_result(None))

            self.get_result(20)

            self.assertEqual(call_records['N_1'], 'NoneUID')
            self.assertLess(sum(abs(call_records['%d_1' % idx] - call_records['0_1'])
                                for idx in range(group_size)), 1)
            self.assertGreater(call_records['%d_1' % group_size] - call_records['0_1'], 1)
            self.assertLess(call_records['%d_1' % group_size] - call_records['0_1'], 3)

            dispatch_ref.destroy()
예제 #5
0
파일: test_assigner.py 프로젝트: zvrr/mars
    def testAssignerActor(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool:
            cluster_info_ref = pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                                                 uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())

            endpoint1 = 'localhost:12345'
            endpoint2 = 'localhost:23456'
            res = dict(hardware=dict(cpu=4, memory=4096))

            def write_mock_meta():
                resource_ref.set_worker_meta(endpoint1, res)
                resource_ref.set_worker_meta(endpoint2, res)

            g = gevent.spawn(write_mock_meta)
            g.join()

            assigner_ref = pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            session_id = str(uuid.uuid4())
            op_key = str(uuid.uuid4())
            chunk_key1 = str(uuid.uuid4())
            chunk_key2 = str(uuid.uuid4())
            chunk_key3 = str(uuid.uuid4())

            op_info = {
                'op_name': 'test_op',
                'io_meta': dict(input_chunks=[chunk_key1, chunk_key2, chunk_key3]),
                'retries': 0,
                'optimize': {
                    'depth': 0,
                    'demand_depths': (),
                    'successor_size': 1,
                    'descendant_size': 0
                }
            }

            chunk_meta_client = ChunkMetaClient(pool, cluster_info_ref)
            chunk_meta_client.set_chunk_meta(session_id, chunk_key1, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key2, size=512, workers=(endpoint1,))
            chunk_meta_client.set_chunk_meta(session_id, chunk_key3, size=512, workers=(endpoint2,))

            reply_ref = pool.create_actor(PromiseReplyTestActor)
            reply_callback = ((reply_ref.uid, reply_ref.address), 'reply')
            assigner_ref.apply_for_resource(session_id, op_key, op_info, callback=reply_callback)

            while not reply_ref.get_reply():
                gevent.sleep(0.1)
            _, ret_value = reply_ref.get_reply()
            self.assertEqual(ret_value[0], endpoint1)
예제 #6
0
    def setUp(self):
        endpoint = '127.0.0.1:%d' % get_next_port()
        self.endpoint = endpoint
        self.pool = create_actor_pool(n_process=1,
                                      backend='gevent',
                                      address=endpoint)
        self.pool.create_actor(SchedulerClusterInfoActor, [endpoint],
                               uid=SchedulerClusterInfoActor.default_uid())
        self.pool.create_actor(SessionManagerActor,
                               uid=SessionManagerActor.default_uid())
        self.pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())

        self.api = MarsAPI(endpoint)
예제 #7
0
def create_bearer_token_actor():
    from mars.actors import create_actor_pool, FunctionActor

    class BearerTokenActor(FunctionActor):
        def get_bearer_token(self):
            from cupid import context

            ctx = context()
            return ctx.get_bearer_token()

    pool = create_actor_pool(address=ACTOR_ADDRESS, n_process=1)
    pool.create_actor(BearerTokenActor, uid=ACTOR_UID)
    pool.join()
예제 #8
0
파일: test_promise.py 프로젝트: zvrr/mars
    def testNoTimeoutActor(self):
        try:
            with create_actor_pool(n_process=1) as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                test_ref.test_no_timeout()

                wait_test_actor_result(test_ref, 30)

                self.assertListEqual(serve_ref.get_result(), [0])
        finally:
            self.assertEqual(promise.get_active_promise_count(), 0)
예제 #9
0
파일: test_promise.py 프로젝트: zvrr/mars
    def testRefReject(self):
        try:
            with create_actor_pool(n_process=1) as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                test_ref.test_ref_reject()

                wait_test_actor_result(test_ref, 30)
                self.assertListEqual(serve_ref.get_result(),
                                     [0, 'WorkerProcessStopped'])
        finally:
            self.assertEqual(promise.get_active_promise_count(), 0)
예제 #10
0
    def testDispatch(self, *_):
        call_records = dict()
        group_size = 4

        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            dispatch_ref = pool.create_actor(DispatchActor,
                                             uid='DispatchActor')
            # actors of g1
            [
                pool.create_actor(TaskActor, 'g1', call_records)
                for _ in range(group_size)
            ]
            [
                pool.create_actor(TaskActor, 'g2', call_records)
                for _ in range(group_size)
            ]

            self.assertEqual(len(dispatch_ref.get_slots('g1')), group_size)
            self.assertEqual(len(dispatch_ref.get_slots('g2')), group_size)
            self.assertEqual(len(dispatch_ref.get_slots('g3')), 0)

            self.assertEqual(dispatch_ref.get_hash_slot('g1', 'hash_str'),
                             dispatch_ref.get_hash_slot('g1', 'hash_str'))

            # tasks within [0, group_size - 1] will run almost simultaneously,
            # while the last one will be delayed due to lack of

            def run_tasks():
                test_ref = pool.create_actor(RunTaskTestActor, call_records)
                test_ref.run_tasks(group_size)
                while not test_ref.get_finished():
                    gevent.sleep(1)
                test_ref.destroy()

            gl = gevent.spawn(run_tasks)
            gl.join()

            self.assertEqual(call_records['N_1'], 'NoneUID')
            self.assertLess(
                sum(
                    abs(call_records['%d_1' % idx] - call_records['0_1'])
                    for idx in range(group_size)), 1)
            self.assertGreater(
                call_records['%d_1' % group_size] - call_records['0_1'], 1)
            self.assertLess(
                call_records['%d_1' % group_size] - call_records['0_1'], 3)

            dispatch_ref.destroy()
예제 #11
0
파일: test_transfer.py 프로젝트: lmatz/mars
def run_transfer_worker(pool_address, session_id, plasma_socket, chunk_keys,
                        spill_dir, msg_queue):
    from mars.config import options
    from mars.utils import PlasmaProcessHelper

    options.worker.plasma_socket = plasma_socket
    options.worker.spill_directory = spill_dir

    plasma_helper = PlasmaProcessHelper(size=1024 * 1024 * 10, socket=options.worker.plasma_socket)
    try:
        plasma_helper.run()

        with create_actor_pool(n_process=2, backend='gevent', distributor=WorkerDistributor(2),
                               address=pool_address) as pool:
            try:
                pool.create_actor(ClusterInfoActor, schedulers=[pool_address],
                                  uid=ClusterInfoActor.default_name())
                pool.create_actor(KVStoreActor, uid=KVStoreActor.default_name())
                pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
                pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_name())
                holder_ref = pool.create_actor(HolderActor, uid='HolderActor')
                chunk_holder_ref = pool.create_actor(ChunkHolderActor, plasma_helper._size,
                                                     uid=ChunkHolderActor.default_name())
                pool.create_actor(SpillActor)

                pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4()))
                pool.create_actor(SenderActor, uid='%s' % str(uuid.uuid4()))

                pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4()))
                pool.create_actor(ReceiverActor, uid='%s' % str(uuid.uuid4()))

                register_actor = pool.create_actor(WorkerRegistrationTestActor)
                register_actor.register(session_id, chunk_keys)

                check_time = time.time()
                while not register_actor.get_finished():
                    gevent.sleep(0.5)
                    if time.time() - check_time > 60:
                        raise SystemError('Wait result timeout')
                register_actor.destroy()

                msg_queue.put(1)
                check_time = time.time()
                while not holder_ref.obtain():
                    gevent.sleep(1)
                    if time.time() - check_time > 60:
                        raise SystemError('Wait result timeout')
            finally:
                pool.destroy_actor(chunk_holder_ref)
    finally:
        plasma_helper.stop()
예제 #12
0
파일: test_main.py 프로젝트: ai-driven/mars
    def testWorkerProcessRestart(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(),
                                            address=worker_endpoint)
                dispatch_ref = pool.actor_ref(DispatchActor.default_name(),
                                              address=worker_endpoint)
                cpu_slots = dispatch_ref.get_slots('cpu')
                calc_ref = pool.actor_ref(cpu_slots[0],
                                          address=worker_endpoint)
                daemon_ref.kill_actor_process(calc_ref)

                check_start = time.time()
                while not daemon_ref.is_actor_process_alive(calc_ref):
                    gevent.sleep(0.1)
                    if time.time() - check_start > 10:
                        raise TimeoutError('Check process restart timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
예제 #13
0
파일: test_promise.py 프로젝트: zvrr/mars
    def testAllActor(self):
        try:
            with create_actor_pool(n_process=1) as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                test_ref.test_all_promise()

                wait_test_actor_result(test_ref, 30)
                self.assertListEqual(serve_ref.get_result(),
                                     [-128] + list(range(0, 20, 2)) +
                                     list(range(1, 20, 2)) + [127])
        finally:
            self.assertEqual(promise.get_active_promise_count(), 0)
예제 #14
0
    def testPromiseActor(self):
        try:
            with create_actor_pool() as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                def test_proc():
                    test_ref.test_normal()
                    gevent.sleep(2)
                    self.assertListEqual(serve_ref.get_result(), list(range(11)))

                gl = gevent.spawn(test_proc)
                gl.join()
        finally:
            self.assertDictEqual(promise._promise_pool, {})
예제 #15
0
    def _run_operand_case(session_id, graph_key, tensor, execution_creator):
        graph = tensor.build_graph(compose=False)

        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialize_graph(graph),
                                          uid=GraphActor.gen_name(session_id, graph_key))
            addr_dict = dict()

            def _build_mock_ref(uid=None, address=None):
                if address in addr_dict:
                    return addr_dict[address]
                else:
                    r = addr_dict[address] = execution_creator(pool)
                    return r

            # handle mock objects
            OperandActor._get_raw_execution_ref.side_effect = _build_mock_ref

            mock_resource = dict(hardware=dict(cpu=4, cpu_total=4, memory=512))

            resource_ref.set_worker_meta('localhost:12345', mock_resource)
            resource_ref.set_worker_meta('localhost:23456', mock_resource)

            graph_ref.prepare_graph()
            fetched_graph = graph_ref.get_chunk_graph()

            graph_ref.analyze_graph()

            final_keys = set()
            for c in fetched_graph:
                if fetched_graph.count_successors(c) == 0:
                    final_keys.add(c.op.key)

            graph_ref.create_operand_actors()

            graph_meta_ref = pool.actor_ref(GraphMetaActor.gen_name(session_id, graph_key))
            start_time = time.time()
            while True:
                pool.sleep(0.1)
                if time.time() - start_time > 30:
                    raise SystemError('Wait for execution finish timeout')
                if graph_meta_ref.get_state() in (GraphState.SUCCEEDED, GraphState.FAILED, GraphState.CANCELLED):
                    break
예제 #16
0
    def testRefReject(self):
        try:
            with create_actor_pool() as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                def run_proc_test():
                    test_ref.test_ref_reject()
                    gc.collect()
                    gevent.sleep(3)
                    self.assertListEqual(serve_ref.get_result(), [0, 'WorkerProcessStopped'])

                gl = gevent.spawn(run_proc_test)
                gl.join()
        finally:
            self.assertDictEqual(promise._promise_pool, {})
예제 #17
0
    def testNoTimeoutActor(self):
        try:
            with create_actor_pool() as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                def run_proc_test():
                    test_ref.test_no_timeout()
                    gc.collect()
                    gevent.sleep(3)
                    # print(serve_ref.get_result())
                    self.assertListEqual(serve_ref.get_result(), [0])

                gl = gevent.spawn(run_proc_test)
                gl.join()
        finally:
            self.assertDictEqual(promise._promise_pool, {})
예제 #18
0
    def testPromiseActor(self):
        try:
            with create_actor_pool(n_process=1) as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                test_ref.test_normal()
                wait_test_actor_result(test_ref, 10)
                self.assertListEqual(serve_ref.get_result(), list(range(11)))

                serve_ref.clear_result()

                test_ref.test_error_raise()
                wait_test_actor_result(test_ref, 10)
                self.assertListEqual(serve_ref.get_result(), [-1])
        finally:
            self.assertDictEqual(promise._promise_pool, {})
예제 #19
0
    def testResourceActor(self):
        session_id = str(uuid.uuid4())
        with create_actor_pool(n_process=1, backend='gevent') as pool:
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            mock_resource = dict(hardware=dict(cpu=4, memory=512))

            ep1 = 'localhost:12345'
            ep2 = 'localhost:23456'

            def write_mock_meta():
                resource_ref.set_worker_meta(ep1, mock_resource)
                resource_ref.set_worker_meta(ep2, mock_resource)
                return resource_ref.get_workers_meta()

            g = gevent.spawn(write_mock_meta)
            g.join()
            self.assertEqual({ep1: mock_resource, ep2: mock_resource}, g.value)

            key1 = str(uuid.uuid4())
            self.assertFalse(
                resource_ref.allocate_resource(session_id, key1, ep1,
                                               dict(cpu=5, memory=256)))
            key2 = str(uuid.uuid4())
            self.assertTrue(
                resource_ref.allocate_resource(session_id, key2, ep1,
                                               dict(cpu=2, memory=256)))
            key3 = str(uuid.uuid4())
            self.assertFalse(
                resource_ref.allocate_resource(session_id, key3, ep1,
                                               dict(cpu=2, memory=260)))
            key4 = str(uuid.uuid4())
            self.assertTrue(
                resource_ref.allocate_resource(session_id, key4, ep1,
                                               dict(cpu=2, memory=256)))
            key5 = str(uuid.uuid4())
            self.assertFalse(
                resource_ref.allocate_resource(session_id, key5, ep1,
                                               dict(cpu=2, memory=256)))
            resource_ref.deallocate_resource(session_id, key4, ep1)
            key6 = str(uuid.uuid4())
            self.assertTrue(
                resource_ref.allocate_resource(session_id, key6, ep1,
                                               dict(cpu=2, memory=256)))
            resource_ref.deallocate_resource(session_id, key6, ep1)
예제 #20
0
def start_transfer_test_pool(**kwargs):
    address = kwargs.pop('address')
    plasma_size = kwargs.pop('plasma_size')
    with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool:
        pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_name())
        pool.create_actor(ClusterInfoActor, schedulers=[address],
                          uid=ClusterInfoActor.default_name())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
        pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_name())
        chunk_holder_ref = pool.create_actor(ChunkHolderActor,
                                             plasma_size, uid=ChunkHolderActor.default_name())
        pool.create_actor(SpillActor)
        pool.create_actor(StatusActor, address, uid=StatusActor.default_name())

        yield pool

        chunk_holder_ref.destroy()
예제 #21
0
    def testKVStoreActor(self):
        proc_helper = EtcdProcessHelper(port_range_start=54131)
        with proc_helper.run(), create_actor_pool(n_process=1,
                                                  backend='gevent') as pool:
            store_ref = pool.create_actor(KVStoreActor,
                                          uid=KVStoreActor.default_name())

            store_ref.write('/node/v1', 'value1')
            store_ref.write('/node/v2', 'value2')
            store_ref.write_batch([
                ('/node/v2', 'value2'),
                ('/node/v3', 'value3'),
            ])

            self.assertEqual(store_ref.read('/node/v1').value, 'value1')
            self.assertListEqual([
                v.value for v in store_ref.read_batch(['/node/v2', '/node/v3'])
            ], ['value2', 'value3'])
예제 #22
0
    def testAllActor(self):
        try:
            with create_actor_pool() as pool:
                serve_ref = pool.create_actor(ServeActor, uid='ServeActor')
                test_ref = pool.create_actor(PromiseTestActor)

                def run_proc_test():
                    test_ref.test_all_promise()
                    gc.collect()
                    gevent.sleep(3)
                    self.assertListEqual(serve_ref.get_result(),
                                         [-128] + list(range(0, 20, 2)) +
                                         list(range(1, 20, 2)) + [127])

                gl = gevent.spawn(run_proc_test)
                gl.join()
        finally:
            self.assertDictEqual(promise._promise_pool, {})
예제 #23
0
    def testTaskQueueActor(self):
        with create_actor_pool(n_process=1, backend='gevent') as pool:
            pool.create_actor(MockExecutionActor,
                              10,
                              uid=ExecutionActor.default_name())
            quota_ref = pool.create_actor(QuotaActor,
                                          30,
                                          uid=MemQuotaActor.default_name())
            pool.create_actor(TaskQueueActor,
                              4,
                              uid=TaskQueueActor.default_name())

            session_id = str(uuid.uuid4())
            chunk_keys = [str(uuid.uuid4()).replace('-', '') for _ in range(6)]

            with self.run_actor_test(pool) as test_actor:
                queue_ref = test_actor.promise_ref(
                    TaskQueueActor.default_name())
                res_times = dict()

                def callback_fun(key):
                    res_times[key] = time.time()

                for idx, k in enumerate(chunk_keys):
                    depth = len(chunk_keys) - idx
                    queue_ref.enqueue_task(session_id, k, dict(depth=depth), _promise=True) \
                        .then(functools.partial(callback_fun, k))

                gevent.sleep(1)
                self.assertEqual(queue_ref.get_allocated_count(), 4)

                queue_ref.update_priority(session_id, chunk_keys[-1],
                                          dict(depth=len(chunk_keys)))
                quota_ref.release_quota(chunk_keys[0])
                queue_ref.release_task(session_id, chunk_keys[0])
                quota_ref.release_quota(chunk_keys[1])
                queue_ref.release_task(session_id, chunk_keys[1])
                gevent.sleep(0.5)

                self.assertIn(chunk_keys[-1], res_times)
                for k in chunk_keys[:3]:
                    self.assertLessEqual(res_times[k],
                                         res_times[chunk_keys[-1]] - 0.5)
                    self.assertIn(k, res_times)
예제 #24
0
    def testFailoverMessage(self):
        mock_session_id = str(uuid.uuid4())
        mock_graph_key = str(uuid.uuid4())
        mock_chunk_key = str(uuid.uuid4())
        addr = '127.0.0.1:%d' % get_next_port()
        mock_worker_addr = '127.0.0.1:54132'

        options.scheduler.worker_blacklist_time = 0.5

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            session_manager_ref = pool.create_actor(
                SessionManagerActor, uid=SessionManagerActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())

            session_ref = pool.actor_ref(session_manager_ref.create_session(mock_session_id))
            chunk_meta_ref.set_chunk_meta(mock_session_id, mock_chunk_key,
                                          size=80, shape=(10,), workers=(mock_worker_addr,))

            with mock.patch(GraphActor.__module__ + '.' + GraphActor.__name__, new=MockGraphActor):
                session_ref.submit_tensor_graph(None, mock_graph_key)
                graph_ref = pool.actor_ref(GraphActor.gen_name(mock_session_id, mock_graph_key))

                expire_time = time.time() - options.scheduler.status_timeout - 1
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=expire_time))

                resource_ref.detect_dead_workers(_tell=True)
                pool.sleep(0.2)

                _, removes, lost_chunks = graph_ref.get_worker_change_args()
                self.assertListEqual(removes, [mock_worker_addr])
                self.assertListEqual(lost_chunks, [mock_chunk_key])

                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())

                pool.sleep(0.4)
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertIn(mock_worker_addr, resource_ref.get_workers_meta())
예제 #25
0
    def testMemQuotaAllocation(self):
        from mars import resource
        from mars.utils import AttributeDict

        mock_mem_stat = AttributeDict(
            dict(total=300, available=50, used=0, free=50))
        local_pool_addr = 'localhost:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \
                patch_method(resource.virtual_memory, new=lambda: mock_mem_stat):
            pool.create_actor(WorkerClusterInfoActor,
                              schedulers=[local_pool_addr],
                              uid=WorkerClusterInfoActor.default_name())
            pool.create_actor(StatusActor,
                              local_pool_addr,
                              uid=StatusActor.default_name())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            pool.create_actor(ProcessHelperActor,
                              uid=ProcessHelperActor.default_name())
            quota_ref = pool.create_actor(MemQuotaActor,
                                          300,
                                          refresh_time=0.1,
                                          uid=MemQuotaActor.default_name())

            time_recs = []
            with self.run_actor_test(pool) as test_actor:
                ref = test_actor.promise_ref(quota_ref)
                time_recs.append(time.time())

                def actual_exec(x):
                    ref.release_quota(x)
                    time_recs.append(time.time())
                    test_actor.set_result(None)

                ref.request_quota('req', 100, _promise=True) \
                    .then(functools.partial(actual_exec, 'req'))

                pool.sleep(0.5)
                mock_mem_stat['available'] = 150
                mock_mem_stat['free'] = 150

                self.get_result(2)

            self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
예제 #26
0
파일: test_main.py 프로젝트: ai-driven/mars
    def testExecuteWorker(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                test_ref = pool.create_actor(WorkerProcessTestActor)
                test_ref.run_test(worker_endpoint, _tell=True)

                check_time = time.time()
                while not test_ref.get_reply():
                    gevent.sleep(0.1)
                    if time.time() - check_time > 20:
                        raise TimeoutError('Check reply timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
예제 #27
0
    def testSendTargets(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=WorkerDistributor(2)) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)
            result_key = result_tensor.chunks[0].key

            pool.create_actor(MockSenderActor,
                              mock_data + np.ones((4, )),
                              'out',
                              uid='w:mock_sender')
            with self.run_actor_test(pool) as test_actor:

                def _validate(_):
                    data = test_actor._chunk_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None,
                                            send_addresses={result_key: (pool_address,)}, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
예제 #28
0
파일: test_main.py 프로젝트: Haxine/mars-1
    def _start_worker_process(self, no_cuda=True, cuda_device=None):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  [mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_uid())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_uid())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_uid())

                args = [
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ]
                env = os.environ.copy()
                if no_cuda:
                    args.append('--no-cuda')
                else:
                    env['CUDA_VISIBLE_DEVICES'] = cuda_device
                proc = subprocess.Popen(args, env=env)
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                yield pool, worker_endpoint
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
예제 #29
0
    def testEvents(self, *_):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            events_ref = pool.create_actor(EventsActor)
            event1 = events_ref.add_single_event(EventCategory.RESOURCE,
                                                 EventLevel.WARNING,
                                                 ResourceEventType.MEM_HIGH,
                                                 'test_owner')
            self.assertIsNotNone(event1)

            event2 = events_ref.add_open_event(EventCategory.PROCEDURE,
                                               EventLevel.NORMAL,
                                               ProcedureEventType.CPU_CALC,
                                               'test_owner2')
            self.assertIsNotNone(event2)

            time.sleep(1)

            proc_events = events_ref.query_by_time(EventCategory.RESOURCE)
            self.assertEqual(len(proc_events), 0)
            proc_events = events_ref.query_by_time(EventCategory.PROCEDURE)
            self.assertEqual(len(proc_events), 1)

            events_ref.close_event(event2)
            proc_events = events_ref.query_by_time(EventCategory.PROCEDURE)
            self.assertGreater(proc_events[0].time_end,
                               proc_events[0].time_start)

            # repeated closing shall not cause any problems
            events_ref.close_event(event2)

            reloaded = pickle.loads(pickle.dumps(proc_events[0]))
            self.assertEqual(reloaded.event_id, proc_events[0].event_id)

            with EventContext(events_ref, EventCategory.PROCEDURE,
                              EventLevel.NORMAL, ProcedureEventType.CPU_CALC,
                              'test_owner3'):
                proc_events = events_ref.query_by_time(EventCategory.PROCEDURE)
                self.assertIsNone(proc_events[-1].time_end)
            self.assertIsNotNone(proc_events[-1].time_end)
예제 #30
0
    def testBatchQuota(self):
        with create_actor_pool() as pool:
            quota_ref = pool.create_actor(QuotaActor, 300, uid='QuotaActor')
            test_refs = [
                pool.create_actor(BatchQuotaTestActor, 100) for _ in range(2)
            ]

            def test_method():
                for ref in test_refs:
                    ref_str = str(id(ref))
                    ref.mock_step([ref_str + '_0', ref_str + '_1'])
                gevent.sleep(3)
                return [ref.get_end_time() for ref in test_refs]

            gl = gevent.spawn(test_method)
            gl.join()
            end_time = gl.value
            self.assertGreater(abs(end_time[0] - end_time[1]), 0.9)

            self.assertEqual(quota_ref.get_allocated_size(), 0)