Ejemplo n.º 1
0
    def setUp(self):
        scheduler_port = str(get_next_port())
        proc_worker = subprocess.Popen([
            sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
            '--cpu-procs', '2', '--level', 'debug', '--cache-mem', '16m',
            '--schedulers', '127.0.0.1:' + scheduler_port, '--ignore-avail-mem'
        ])
        proc_scheduler = subprocess.Popen([
            sys.executable, '-m', 'mars.scheduler', '-H', '127.0.0.1',
            '--level', 'debug', '-p', scheduler_port, '--format',
            '%(asctime)-15s %(message)s'
        ])

        self.scheduler_port = scheduler_port
        self.proc_worker = proc_worker
        self.proc_scheduler = proc_scheduler

        time.sleep(2)
        actor_client = new_client()
        check_time = time.time()
        while True:
            try:
                resource_ref = actor_client.actor_ref(
                    ResourceActor.default_name(),
                    address='127.0.0.1:' + scheduler_port)
                if actor_client.has_actor(resource_ref):
                    break
                else:
                    raise SystemError('Check meta_timestamp timeout')
            except:
                if time.time() - check_time > 10:
                    raise
                time.sleep(1)

        check_time = time.time()
        while True:
            if not resource_ref.get_worker_count():
                time.sleep(0.5)
                self.check_process_statuses()
                if time.time() - check_time > 20:
                    raise SystemError('Check meta_timestamp timeout')
            else:
                break

        self.exceptions = gevent.hub.Hub.NOT_ERROR
        gevent.hub.Hub.NOT_ERROR = (Exception, )
Ejemplo n.º 2
0
    def testLoadStoreInOtherProcess(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=2,
                              address=test_addr,
                              distributor=MarsDistributor(2)) as pool:
            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())

            pool.create_actor(QuotaActor,
                              1024**2,
                              uid=MemQuotaActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            pool.create_actor(InProcHolderActor, uid='w:1:InProcHolderActor')
            pool.create_actor(IORunnerActor,
                              lock_free=True,
                              dispatched=False,
                              uid=IORunnerActor.gen_uid(1))

            test_ref = pool.create_actor(OtherProcessTestActor,
                                         uid='w:0:OtherProcTest')

            test_ref.run_copy_global_to_proc_test(_tell=True)

            start_time = time.time()
            while test_ref.get_result() is None:
                pool.sleep(0.5)
                if time.time() - start_time > 10:
                    raise TimeoutError

            test_ref.run_copy_proc_to_global_test(_tell=True)

            start_time = time.time()
            while test_ref.get_result() is None:
                pool.sleep(0.5)
                if time.time() - start_time > 10:
                    raise TimeoutError
Ejemplo n.º 3
0
async def actor_pool():
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    pool = await mo.create_actor_pool('127.0.0.1',
                                      n_process=0,
                                      subprocess_start_method=start_method)
    async with pool:
        web_config = {
            'host': '127.0.0.1',
            'port': get_next_port(),
            'web_handlers': {
                TestAPIHandler.get_root_pattern(): TestAPIHandler
            },
        }
        await mo.create_actor(WebActor,
                              web_config,
                              address=pool.external_address)
        yield pool, web_config['port']
Ejemplo n.º 4
0
    def testMemQuotaAllocation(self):
        from mars import resource
        from mars.utils import AttributeDict

        mock_mem_stat = AttributeDict(
            dict(total=300, available=50, used=0, free=50))
        local_pool_addr = 'localhost:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \
                patch_method(resource.virtual_memory, new=lambda: mock_mem_stat):
            pool.create_actor(WorkerClusterInfoActor,
                              schedulers=[local_pool_addr],
                              uid=WorkerClusterInfoActor.default_name())
            pool.create_actor(StatusActor,
                              local_pool_addr,
                              uid=StatusActor.default_name())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            pool.create_actor(ProcessHelperActor,
                              uid=ProcessHelperActor.default_name())
            quota_ref = pool.create_actor(MemQuotaActor,
                                          300,
                                          refresh_time=0.1,
                                          uid=MemQuotaActor.default_name())

            time_recs = []
            with self.run_actor_test(pool) as test_actor:
                ref = test_actor.promise_ref(quota_ref)
                time_recs.append(time.time())

                def actual_exec(x):
                    ref.release_quota(x)
                    time_recs.append(time.time())
                    test_actor.set_result(None)

                ref.request_quota('req', 100, _promise=True) \
                    .then(functools.partial(actual_exec, 'req'))

                pool.sleep(0.5)
                mock_mem_stat['available'] = 150
                mock_mem_stat['free'] = 150

                self.get_result(2)

            self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
Ejemplo n.º 5
0
    def testCudaMemPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(CudaHolderActor)

            test_data = np.random.random((10, 10))
            test_suites = [
                (test_data, cp.ndarray, cp.asnumpy, assert_allclose),
                (pd.Series(test_data.flatten()), cudf.Series,
                 lambda o: o.to_pandas(), pd.testing.assert_series_equal),
                (pd.DataFrame(dict(col=test_data.flatten())), cudf.DataFrame,
                 lambda o: o.to_pandas(), pd.testing.assert_frame_equal),
            ]

            for data, cuda_type, move_to_mem, assert_obj_equal in test_suites:
                ser_data = dataserializer.serialize(data)

                session_id = str(uuid.uuid4())
                data_key1 = str(uuid.uuid4())
                data_key2 = str(uuid.uuid4())

                storage_client = test_actor.storage_client
                handler = storage_client.get_storage_handler((0, DataStorageDevice.CUDA))

                handler.put_objects(session_id, [data_key1], [data])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                                 [(0, DataStorageDevice.CUDA)])
                self.assertIsInstance(handler.get_objects(session_id, [data_key1])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key1])[0]))

                handler.delete(session_id, [data_key1])
                self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [])
                with self.assertRaises(KeyError):
                    handler.get_objects(session_id, [data_key1])

                handler.put_objects(session_id, [data_key2], [ser_data], serialize=True)
                self.assertIsInstance(handler.get_objects(session_id, [data_key2])[0], cuda_type)
                assert_obj_equal(data, move_to_mem(handler.get_objects(session_id, [data_key2])[0]))
                handler.delete(session_id, [data_key2])
Ejemplo n.º 6
0
    def testSendTargets(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address,
                               distributor=WorkerDistributor(2)) as pool:
            self.create_standard_actors(pool,
                                        pool_address,
                                        with_daemon=False,
                                        with_status=False)
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((4, ), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)
            result_key = result_tensor.chunks[0].key

            pool.create_actor(MockSenderActor,
                              mock_data + np.ones((4, )),
                              'out',
                              uid='w:mock_sender')
            with self.run_actor_test(pool) as test_actor:

                def _validate(_):
                    data = test_actor._chunk_store.get(
                        session_id, result_tensor.chunks[0].key)
                    assert_array_equal(data, mock_data + np.ones((4, )))

                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(
                    ExecutionActor.default_name())
                execution_ref.enqueue_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None,
                                            send_addresses={result_key: (pool_address,)}, _promise=True) \
                    .then(lambda *_: execution_ref.start_execution(session_id, graph_key, _promise=True)) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Ejemplo n.º 7
0
    def testExecuteWorker(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                test_ref = pool.create_actor(WorkerProcessTestActor)
                test_ref.run_test(worker_endpoint, _tell=True)

                check_time = time.time()
                while not test_ref.get_reply():
                    gevent.sleep(0.1)
                    if time.time() - check_time > 20:
                        raise TimeoutError('Check reply timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Ejemplo n.º 8
0
    def testFailoverMessage(self):
        mock_session_id = str(uuid.uuid4())
        mock_graph_key = str(uuid.uuid4())
        mock_chunk_key = str(uuid.uuid4())
        addr = '127.0.0.1:%d' % get_next_port()
        mock_worker_addr = '127.0.0.1:54132'

        options.scheduler.worker_blacklist_time = 0.5

        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            session_manager_ref = pool.create_actor(
                SessionManagerActor, uid=SessionManagerActor.default_name())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())
            chunk_meta_ref = pool.create_actor(
                ChunkMetaActor, uid=ChunkMetaActor.default_name())

            session_ref = pool.actor_ref(session_manager_ref.create_session(mock_session_id))
            chunk_meta_ref.set_chunk_meta(mock_session_id, mock_chunk_key,
                                          size=80, shape=(10,), workers=(mock_worker_addr,))

            with mock.patch(GraphActor.__module__ + '.' + GraphActor.__name__, new=MockGraphActor):
                session_ref.submit_tensor_graph(None, mock_graph_key)
                graph_ref = pool.actor_ref(GraphActor.gen_name(mock_session_id, mock_graph_key))

                expire_time = time.time() - options.scheduler.status_timeout - 1
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=expire_time))

                resource_ref.detect_dead_workers(_tell=True)
                pool.sleep(0.2)

                _, removes, lost_chunks = graph_ref.get_worker_change_args()
                self.assertListEqual(removes, [mock_worker_addr])
                self.assertListEqual(lost_chunks, [mock_chunk_key])

                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertNotIn(mock_worker_addr, resource_ref.get_workers_meta())

                pool.sleep(0.4)
                resource_ref.set_worker_meta(mock_worker_addr, dict(update_time=time.time()))
                self.assertIn(mock_worker_addr, resource_ref.get_workers_meta())
Ejemplo n.º 9
0
    def _start_worker_process(self, no_cuda=True, cuda_device=None):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  [mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_uid())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_uid())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_uid())

                args = [
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ]
                env = os.environ.copy()
                if no_cuda:
                    args.append('--no-cuda')
                else:
                    env['CUDA_VISIBLE_DEVICES'] = cuda_device
                proc = subprocess.Popen(args, env=env)
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                yield pool, worker_endpoint
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Ejemplo n.º 10
0
    def testReExecuteExisting(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=1, backend='gevent',
                               address=pool_address, distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False, with_status=False)
            pool.create_actor(CpuCalcActor, uid='w:1:cpu-calc')
            pool.create_actor(InProcHolderActor, uid='w:1:inproc-holder')

            import mars.tensor as mt
            arr = mt.ones((4,), chunk_size=4)
            arr_add = mt.array(mock_data)
            result_tensor = arr + arr_add
            graph = result_tensor.build_graph(compose=False, tiled=True)

            result_tensor = get_tiled(result_tensor)

            def _validate(_):
                data = test_actor.shared_store.get(session_id, result_tensor.chunks[0].key)
                assert_array_equal(data, mock_data + np.ones((4,)))

            with self.run_actor_test(pool) as test_actor:
                graph_key = str(uuid.uuid4())
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()

            with self.run_actor_test(pool) as test_actor:
                execution_ref = test_actor.promise_ref(ExecutionActor.default_uid())
                execution_ref.execute_graph(session_id, graph_key, serialize_graph(graph),
                                            dict(chunks=[result_tensor.chunks[0].key]), None, _promise=True) \
                    .then(_validate) \
                    .then(lambda *_: test_actor.set_result(None)) \
                    .catch(lambda *exc: test_actor.set_result(exc, False))

            self.get_result()
Ejemplo n.º 11
0
    def testWorkerProcessRestart(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1, backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(ClusterInfoActor, schedulers=[mock_scheduler_addr],
                                  uid=ClusterInfoActor.default_name())
                pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([sys.executable, '-m', 'mars.worker',
                                         '-a', '127.0.0.1',
                                         '--schedulers', mock_scheduler_addr,
                                         '--cpu-procs', '1',
                                         '--cache-mem', '10m',
                                         '--spill-dir', self._spill_dir,
                                         '--ignore-avail-mem'])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(), address=worker_endpoint)
                dispatch_ref = pool.actor_ref(DispatchActor.default_name(), address=worker_endpoint)
                cpu_slots = dispatch_ref.get_slots('cpu')
                calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint)
                daemon_ref.kill_actor_process(calc_ref)

                check_start = time.time()
                while not daemon_ref.is_actor_process_alive(calc_ref):
                    gevent.sleep(0.1)
                    if time.time() - check_start > 10:
                        raise TimeoutError('Check process restart timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll() is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Ejemplo n.º 12
0
    def testEvents(self, *_):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=mock_scheduler_addr) as pool:
            events_ref = pool.create_actor(EventsActor)
            event1 = events_ref.add_single_event(EventCategory.RESOURCE,
                                                 EventLevel.WARNING,
                                                 ResourceEventType.MEM_HIGH,
                                                 'test_owner')
            self.assertIsNotNone(event1)

            event2 = events_ref.add_open_event(EventCategory.PROCEDURE,
                                               EventLevel.NORMAL,
                                               ProcedureEventType.CPU_CALC,
                                               'test_owner2')
            self.assertIsNotNone(event2)

            time.sleep(1)

            proc_events = events_ref.query_by_time(EventCategory.RESOURCE)
            self.assertEqual(len(proc_events), 0)
            proc_events = events_ref.query_by_time(EventCategory.PROCEDURE)
            self.assertEqual(len(proc_events), 1)

            events_ref.close_event(event2)
            proc_events = events_ref.query_by_time(EventCategory.PROCEDURE)
            self.assertGreater(proc_events[0].time_end,
                               proc_events[0].time_start)

            # repeated closing shall not cause any problems
            events_ref.close_event(event2)

            reloaded = pickle.loads(pickle.dumps(proc_events[0]))
            self.assertEqual(reloaded.event_id, proc_events[0].event_id)

            with EventContext(events_ref, EventCategory.PROCEDURE,
                              EventLevel.NORMAL, ProcedureEventType.CPU_CALC,
                              'test_owner3'):
                proc_events = events_ref.query_by_time(EventCategory.PROCEDURE)
                self.assertIsNone(proc_events[-1].time_end)
            self.assertIsNotNone(proc_events[-1].time_end)
Ejemplo n.º 13
0
async def actor_pool():
    start_method = os.environ.get('POOL_START_METHOD', 'forkserver') \
        if sys.platform != 'win32' else None
    pool = await mo.create_actor_pool('127.0.0.1', n_process=0,
                                      subprocess_start_method=start_method)
    async with pool:
        web_config = {
            'host': '127.0.0.1',
            'port': get_next_port(),
            'web_handlers': {
                '/api': MarsApiEntryHandler,
                TestAPIHandler.get_root_pattern(): TestAPIHandler,
            },
            'extra_discovery_modules': [
                'mars.services.web.tests.extra_handler'
            ]
        }
        await mo.create_actor(
            WebActor, web_config, address=pool.external_address)
        yield pool, web_config['port']
Ejemplo n.º 14
0
    def testKVStoreActor(self):
        etcd_port = get_next_port()
        proc_helper = EtcdProcessHelper(port_range_start=etcd_port)
        options.kv_store = 'etcd://127.0.0.1:%s' % etcd_port
        with proc_helper.run(), create_actor_pool(n_process=1,
                                                  backend='gevent') as pool:
            store_ref = pool.create_actor(KVStoreActor,
                                          uid=KVStoreActor.default_name())

            store_ref.write('/node/v1', 'value1')
            store_ref.write('/node/v2', 'value2')
            store_ref.write_batch([
                ('/node/v2', 'value2'),
                ('/node/v3', 'value3'),
            ])

            self.assertEqual(store_ref.read('/node/v1').value, 'value1')
            self.assertListEqual([
                v.value for v in store_ref.read_batch(['/node/v2', '/node/v3'])
            ], ['value2', 'value3'])
Ejemplo n.º 15
0
async def test_meta_web_api():
    pool = await mo.create_actor_pool('127.0.0.1', n_process=0)
    web_port = get_next_port()

    async with pool:
        config = {
            "services": ["cluster", "session", "meta", "web"],
            "cluster": {
                "backend": "fixed",
                "lookup_address": pool.external_address,
            },
            "meta": {
                "store": "dict"
            },
            "web": {
                "port": web_port,
            }
        }
        await start_services(NodeRole.SUPERVISOR,
                             config,
                             address=pool.external_address)

        session_id = 'test_session'
        session_api = await SessionAPI.create(pool.external_address)
        await session_api.create_session(session_id)

        t = mt.random.rand(10, 10)
        t = tile(t)

        meta_api = await MetaAPI.create(session_id, pool.external_address)
        web_api = WebMetaAPI(session_id, f'http://localhost:{web_port}')

        await meta_api.set_chunk_meta(t.chunks[0],
                                      bands=[(pool.external_address, 'numa-0')
                                             ])
        meta = await web_api.get_chunk_meta(t.chunks[0].key,
                                            fields=['shape', 'bands'])
        assert set(meta.keys()) == {'shape', 'bands'}

        with pytest.raises(KeyError):
            await web_api.get_chunk_meta('non-exist-key')
Ejemplo n.º 16
0
    def testEmptyGraph(self, *_):
        session_id = str(uuid.uuid4())

        addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=addr) as pool:
            pool.create_actor(SchedulerClusterInfoActor, [pool.cluster_info.address],
                              uid=SchedulerClusterInfoActor.default_uid())
            resource_ref = pool.create_actor(ResourceActor, uid=ResourceActor.default_uid())
            pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_uid())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_uid())

            resource_ref.set_worker_meta('localhost:12345', dict(hardware=dict(cpu_total=4)))
            resource_ref.set_worker_meta('localhost:23456', dict(hardware=dict(cpu_total=4)))

            graph_key = str(uuid.uuid4())
            serialized_graph = serialize_graph(DAG())

            graph_ref = pool.create_actor(GraphActor, session_id, graph_key, serialized_graph,
                                          uid=GraphActor.gen_uid(session_id, graph_key))
            graph_ref.execute_graph()
            self.assertEqual(graph_ref.get_state(), GraphState.SUCCEEDED)
Ejemplo n.º 17
0
    def _start_shared_holder_pool(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerClusterInfoActor, [test_addr],
                              uid=WorkerClusterInfoActor.default_uid())
            pool.create_actor(StatusActor,
                              test_addr,
                              uid=StatusActor.default_uid())

            pool.create_actor(WorkerDaemonActor,
                              uid=WorkerDaemonActor.default_uid())
            pool.create_actor(StorageManagerActor,
                              uid=StorageManagerActor.default_uid())
            pool.create_actor(PlasmaKeyMapActor,
                              uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor,
                              self.plasma_storage_size,
                              uid=SharedHolderActor.default_uid())

            yield pool, test_actor
Ejemplo n.º 18
0
    def testStatus(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        old_spill_dir = options.worker.spill_directory
        dir_name = options.worker.spill_directory = tempfile.mkdtemp(
            prefix='temp-mars-spill-')
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=pool_address) as pool:
                pool.create_actor(SchedulerClusterInfoActor, [pool_address],
                                  uid=SchedulerClusterInfoActor.default_uid())
                pool.create_actor(WorkerClusterInfoActor, [pool_address],
                                  uid=WorkerClusterInfoActor.default_uid())

                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_uid())
                pool.create_actor(SharedHolderActor,
                                  self.plasma_storage_size,
                                  uid=SharedHolderActor.default_uid())
                status_ref = pool.create_actor(StatusActor,
                                               pool_address,
                                               uid=StatusActor.default_uid())
                status_ref.enable_status_upload()

                status_ref.update_slots(dict(cpu=4))
                status_ref.update_stats(dict(min_est_finish_time=10))

                def delay_read():
                    gevent.sleep(1.5)
                    return resource_ref.get_workers_meta()

                gl = gevent.spawn(delay_read)
                gl.join()
                v = gl.value
                self.assertIsNotNone(v)

                pool.destroy_actor(status_ref)
        finally:
            options.worker.spill_directory = old_spill_dir
            shutil.rmtree(dir_name)
Ejemplo n.º 19
0
    def testSharedPutAndGet(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            handler.put_objects(session_id, [data_key1], [data1])
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.SHARED_MEMORY)])
            assert_allclose(data1, handler.get_objects(session_id, [data_key1])[0])

            handler.delete(session_id, [data_key1])
            self.assertEqual(list(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]), [])
            with self.assertRaises(KeyError):
                handler.get_objects(session_id, [data_key1])

            handler.put_objects(session_id, [data_key2], [ser_data2], serialize=True)
            assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])

            handler.put_objects(session_id, [data_key2], [bytes_data2], serialize=True)
            assert_allclose(data2, handler.get_objects(session_id, [data_key2])[0])
            handler.delete(session_id, [data_key2])
Ejemplo n.º 20
0
    def testKVStoreActor(self):
        etcd_port = get_next_port()
        proc_helper = EtcdProcessHelper(port_range_start=etcd_port)
        options.kv_store = f'etcd://127.0.0.1:{etcd_port}'
        with proc_helper.run(), create_actor_pool(n_process=1, backend='gevent') as pool:
            store_ref = pool.create_actor(KVStoreActor, uid=KVStoreActor.default_uid())

            store_ref.write('/node/v1', 'value1')
            store_ref.write('/node/v2', 'value2')
            store_ref.write_batch([
                ('/node/v2', 'value2'),
                ('/node/v3', 'value3'),
            ])

            self.assertEqual(store_ref.read('/node/v1').value, 'value1')
            self.assertListEqual([v.value for v in store_ref.read_batch(['/node/v2', '/node/v3'])],
                                 ['value2', 'value3'])

            store_ref.delete('/node', dir=True, recursive=True)
            with self.assertRaises(KeyError):
                store_ref.delete('/node', dir=True, recursive=True)
            store_ref.delete('/node', dir=True, recursive=True, silent=True)
Ejemplo n.º 21
0
    def testProcMemPutAndGet(self):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())
            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            data1 = np.random.random((10, 10))
            data2 = np.random.random((10, 10))
            ser_data2 = dataserializer.serialize(data2)
            bytes_data2 = ser_data2.to_buffer()

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())
            data_key2 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY))

            handler.put_object(session_id, data_key1, data1)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, data_key1)),
                             [(0, DataStorageDevice.PROC_MEMORY)])
            assert_allclose(data1, handler.get_object(session_id, data_key1))

            handler.delete(session_id, data_key1)
            self.assertIsNone(storage_manager_ref.get_data_locations(session_id, data_key1))
            with self.assertRaises(KeyError):
                handler.get_object(session_id, data_key1)

            handler.put_object(session_id, data_key2, ser_data2, serialized=True)
            assert_allclose(data2, handler.get_object(session_id, data_key2))
            handler.delete(session_id, data_key2)

            handler.put_object(session_id, data_key2, bytes_data2, serialized=True)
            assert_allclose(data2, handler.get_object(session_id, data_key2))
            handler.delete(session_id, data_key2)
Ejemplo n.º 22
0
    def testStatus(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            pool.create_actor(ClusterInfoActor,
                              schedulers=[pool_address],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(KVStoreActor, uid='KVStoreActor')
            pool.create_actor(ChunkHolderActor,
                              self._plasma_helper._size,
                              uid='ChunkHolderActor')
            pool.create_actor(StatusActor, '127.0.0.1:1234', uid='StatusActor')

            def delay_read():
                gevent.sleep(2)
                return self._kv_store.read('/workers/meta', recursive=True)

            gl = gevent.spawn(delay_read)
            gl.join()
            v = gl.value
            print(v)
Ejemplo n.º 23
0
    def testSharedLoadFromObjects(self, *_):
        test_addr = '127.0.0.1:%d' % get_next_port()
        with self.create_pool(n_process=1, address=test_addr) as pool, \
                self.run_actor_test(pool) as test_actor:
            pool.create_actor(WorkerDaemonActor, uid=WorkerDaemonActor.default_uid())
            storage_manager_ref = pool.create_actor(
                StorageManagerActor, uid=StorageManagerActor.default_uid())

            pool.create_actor(QuotaActor, 1024 ** 2, uid=MemQuotaActor.default_uid())
            pool.create_actor(InProcHolderActor)

            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_uid())
            pool.create_actor(SharedHolderActor, uid=SharedHolderActor.default_uid())

            data1 = np.random.random((10, 10))

            session_id = str(uuid.uuid4())
            data_key1 = str(uuid.uuid4())

            storage_client = test_actor.storage_client
            handler = storage_client.get_storage_handler((0, DataStorageDevice.SHARED_MEMORY))

            # load from object io
            ref_data1 = weakref.ref(data1)

            proc_handler = storage_client.get_storage_handler((0, DataStorageDevice.PROC_MEMORY))
            proc_handler.put_objects(session_id, [data_key1], [data1])
            del data1

            handler.load_from_object_io(session_id, [data_key1], proc_handler) \
                .then(lambda *_: test_actor.set_result(None),
                      lambda *exc: test_actor.set_result(exc, accept=False))
            self.get_result(5)
            self.assertEqual(sorted(storage_manager_ref.get_data_locations(session_id, [data_key1])[0]),
                             [(0, DataStorageDevice.PROC_MEMORY), (0, DataStorageDevice.SHARED_MEMORY)])

            proc_handler.delete(session_id, [data_key1])
            self.assertIsNone(ref_data1())
            handler.delete(session_id, [data_key1])
Ejemplo n.º 24
0
async def test_session_service(test_web):
    pool = await mo.create_actor_pool('127.0.0.1', n_process=0)

    async with pool:
        config = {
            "services": ["cluster", "session", "meta"],
            "cluster": {
                "backend": "fixed",
                "lookup_address": pool.external_address,
            },
            "meta": {
                "store": "dict"
            }
        }
        if test_web:
            config['services'] += ['web']
            config['web'] = {'port': get_next_port()}

        await start_services(NodeRole.SUPERVISOR,
                             config,
                             address=pool.external_address)

        if not test_web:
            session_api = await SessionAPI.create(pool.external_address)
        else:
            session_api = WebSessionAPI(
                f'http://127.0.0.1:{config["web"]["port"]}')
        session_id = 'test_session'
        session_address = await session_api.create_session(session_id)
        assert session_address == pool.external_address
        assert await session_api.has_session(session_id) is True
        assert (await session_api.get_sessions())[0].session_id == session_id
        if not test_web:
            assert await session_api.get_session_address(session_id
                                                         ) == session_address
        await session_api.delete_session(session_id)
        assert await session_api.has_session(session_id) is False
        assert await session_api.get_sessions() == []
Ejemplo n.º 25
0
    def testHolder(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=pool_address) as pool:
            pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_name())
            pool.create_actor(ClusterInfoActor, schedulers=[pool_address],
                              uid=ClusterInfoActor.default_name())
            pool.create_actor(KVStoreActor, uid=KVStoreActor.default_name())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            pool.create_actor(QuotaActor, 1024 * 1024 * 10, uid=MemQuotaActor.default_name())
            cache_ref = pool.create_actor(ChunkHolderActor, self.plasma_storage_size,
                                          uid=ChunkHolderActor.default_name())
            pool.create_actor(SpillActor)

            try:
                test_ref = pool.create_actor(CacheTestActor)
                test_ref.run_test_cache()
                while not test_ref.get_exc_info()[0]:
                    pool.sleep(0.1)
                exc_info = test_ref.get_exc_info()[1]
                if exc_info:
                    six.reraise(*exc_info)
            finally:
                pool.destroy_actor(cache_ref)
Ejemplo n.º 26
0
    def testDaemon(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=2,
                               backend='gevent',
                               distributor=MarsDistributor(2, 'w:0:'),
                               address=mock_scheduler_addr) as pool:
            daemon_ref = pool.create_actor(WorkerDaemonActor,
                                           uid=WorkerDaemonActor.default_uid())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_uid())
            sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor,
                                                  uid='w:1:DaemonSleeperActor')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper')
            test_actor = pool.create_actor(DaemonTestActor)
            daemon_ref.register_actor_callback(
                test_actor,
                DaemonTestActor.handle_process_down_for_actors.__name__)

            test_actor.run_test_sleep(sleeper_ref, 10, _tell=True)
            self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.sleep(0.5)

            daemon_ref.kill_actor_process(sleeper_ref)
            # repeated kill shall not produce errors
            daemon_ref.kill_actor_process(sleeper_ref)
            self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.restart_process(1)
            daemon_ref.handle_process_down([1])
            pool.sleep(1)
            self.assertTrue(pool.has_actor(sleeper_ref))
            with self.assertRaises(WorkerProcessStopped):
                test_actor.get_result()

            test_actor.run_test_sleep(sleeper_ref, 1)
            pool.sleep(1.5)
            test_actor.get_result()
Ejemplo n.º 27
0
    def testStopGraphCalc(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        mock_data = np.array([1, 2, 3, 4])
        with create_actor_pool(n_process=2,
                               backend='gevent',
                               address=pool_address,
                               distributor=MarsDistributor(2, 'w:0:')) as pool:
            self.create_standard_actors(pool, pool_address, with_status=False)

            daemon_ref = pool.actor_ref(WorkerDaemonActor.default_uid())
            execution_ref = pool.actor_ref(ExecutionActor.default_uid())

            calc_ref = daemon_ref.create_actor(MockCpuCalcActor,
                                               session_id,
                                               mock_data,
                                               10,
                                               uid='w:1:cpu-calc-a')
            daemon_ref.create_actor(ProcessHelperActor,
                                    uid='w:1:proc-helper-a')

            test_actor = pool.create_actor(ExecutionTestActor,
                                           uid='w:0:test_actor')
            test_actor.run_simple_calc(session_id, _tell=True)

            pool.sleep(2)
            proc_id = pool.distributor.distribute(calc_ref.uid)
            execution_ref.stop_execution(session_id,
                                         test_actor.get_graph_key(),
                                         _tell=True)
            while daemon_ref.is_actor_process_alive(calc_ref):
                pool.sleep(0.1)
            pool.restart_process(proc_id)
            daemon_ref.handle_process_down([proc_id])

            with self.assertRaises(ExecutionInterrupted):
                self.wait_for_result(pool, test_actor)
Ejemplo n.º 28
0
    def _prepare_test_graph(self, session_id, graph_key, mock_workers):
        addr = '127.0.0.1:%d' % get_next_port()
        a1 = mt.random.random((100, ))
        a2 = mt.random.random((100, ))
        s = a1 + a2
        v1, v2 = mt.split(s, 2)

        graph = DAG()
        v1.build_graph(graph=graph, compose=False)
        v2.build_graph(graph=graph, compose=False)

        with create_actor_pool(n_process=1, backend='gevent',
                               address=addr) as pool:
            pool.create_actor(ClusterInfoActor, [pool.cluster_info.address],
                              uid=ClusterInfoActor.default_name())
            resource_ref = pool.create_actor(ResourceActor,
                                             uid=ResourceActor.default_name())
            pool.create_actor(ChunkMetaActor,
                              uid=ChunkMetaActor.default_name())
            pool.create_actor(AssignerActor, uid=AssignerActor.default_name())
            graph_ref = pool.create_actor(GraphActor,
                                          session_id,
                                          graph_key,
                                          serialize_graph(graph),
                                          uid=GraphActor.gen_name(
                                              session_id, graph_key))

            for w in mock_workers:
                resource_ref.set_worker_meta(w,
                                             dict(hardware=dict(cpu_total=4)))

            graph_ref.prepare_graph()
            graph_ref.scan_node()
            graph_ref.place_initial_chunks()
            graph_ref.create_operand_actors(_start=False)

            yield pool, graph_ref
Ejemplo n.º 29
0
async def test_start_service(actor_pool_context):
    pool = actor_pool_context
    web_port = get_next_port()
    config = {
        'services': [['test_svc1'], 'test_svc2', 'web'],
        'test_svc1': {
            'uid': 'TestActor1',
            'arg1': 'val1'
        },
        'test_svc2': {
            'uid': 'TestActor2',
            'arg2': 'val2',
            'ref': 'TestActor1'
        },
        'web': {
            'port': web_port
        },
    }
    await start_services(NodeRole.SUPERVISOR,
                         config,
                         'mars.services.tests.test_svcs',
                         address=pool.external_address)

    ref1 = await mo.actor_ref('TestActor1', address=pool.external_address)
    ref2 = await mo.actor_ref('TestActor2', address=pool.external_address)
    assert await ref1.get_arg() == 'val1'
    assert await ref2.get_arg() == 'val1:val2'

    with pytest.raises(ImportError):
        await start_services(NodeRole.SUPERVISOR,
                             {'services': ['non-exist-svc']},
                             address=pool.external_address)

    http_client = AsyncHTTPClient()
    resp = await http_client.fetch(
        f'http://127.0.0.1:{web_port}/test_actor1/test_api')
    assert resp.body.decode() == 'val1'
Ejemplo n.º 30
0
    def testEstimateGraphFinishTime(self):
        pool_address = '127.0.0.1:%d' % get_next_port()
        session_id = str(uuid.uuid4())
        with create_actor_pool(n_process=1,
                               backend='gevent',
                               address=pool_address) as pool:
            self.create_standard_actors(pool, pool_address, with_daemon=False)

            status_ref = pool.actor_ref(StatusActor.default_uid())
            execution_ref = pool.actor_ref(ExecutionActor.default_uid())
            pool.create_actor(CpuCalcActor)

            import mars.tensor as mt
            arr = mt.ones((10, 8), chunk_size=10)
            graph = arr.build_graph(compose=False, tiled=True)

            arr = get_tiled(arr)

            graph_key = str(uuid.uuid4())

            for _ in range(options.optimize.min_stats_count + 1):
                status_ref.update_mean_stats(
                    'calc_speed.' + type(arr.chunks[0].op).__name__, 10)
                status_ref.update_mean_stats('disk_read_speed', 10)
                status_ref.update_mean_stats('disk_write_speed', 10)
                status_ref.update_mean_stats('net_transfer_speed', 10)

            execution_ref.execute_graph(session_id, graph_key,
                                        serialize_graph(graph),
                                        dict(chunks=[arr.chunks[0].key]), None)
            execution_ref.estimate_graph_finish_time(session_id, graph_key)

            stats_dict = status_ref.get_stats(
                ['min_est_finish_time', 'max_est_finish_time'])
            self.assertIsNotNone(stats_dict.get('min_est_finish_time'))
            self.assertIsNotNone(stats_dict.get('max_est_finish_time'))