Ejemplo n.º 1
0
    def testDaemon(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        with create_actor_pool(n_process=2,
                               backend='gevent',
                               distributor=MarsDistributor(2, 'w:0:'),
                               address=mock_scheduler_addr) as pool:
            daemon_ref = pool.create_actor(
                WorkerDaemonActor, uid=WorkerDaemonActor.default_name())
            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor,
                                                  uid='w:1:DaemonSleeperActor')
            daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper')
            test_actor = pool.create_actor(DaemonTestActor)
            daemon_ref.register_callback(test_actor, 'handle_process_down')

            test_actor.run_test_sleep(sleeper_ref, 10, _tell=True)
            self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.sleep(0.5)

            daemon_ref.kill_actor_process(sleeper_ref)
            # repeated kill shall not produce errors
            daemon_ref.kill_actor_process(sleeper_ref)
            self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref))

            pool.restart_process(1)
            daemon_ref.handle_process_down([1])
            pool.sleep(1)
            self.assertTrue(pool.has_actor(sleeper_ref))
            with self.assertRaises(WorkerProcessStopped):
                test_actor.get_result()

            test_actor.run_test_sleep(sleeper_ref, 1)
            pool.sleep(1.5)
            test_actor.get_result()
Ejemplo n.º 2
0
def start_transfer_test_pool(**kwargs):
    address = kwargs.pop('address')
    plasma_size = kwargs.pop('plasma_size')
    with create_actor_pool(n_process=1,
                           backend='gevent',
                           address=address,
                           **kwargs) as pool:
        pool.create_actor(SchedulerClusterInfoActor,
                          schedulers=[address],
                          uid=SchedulerClusterInfoActor.default_name())
        pool.create_actor(WorkerClusterInfoActor,
                          schedulers=[address],
                          uid=WorkerClusterInfoActor.default_name())

        pool.create_actor(PlasmaKeyMapActor,
                          uid=PlasmaKeyMapActor.default_name())
        pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name())
        pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
        pool.create_actor(QuotaActor,
                          1024 * 1024 * 20,
                          uid=MemQuotaActor.default_name())
        chunk_holder_ref = pool.create_actor(
            ChunkHolderActor, plasma_size, uid=ChunkHolderActor.default_name())
        pool.create_actor(SpillActor)
        pool.create_actor(StatusActor, address, uid=StatusActor.default_name())

        yield pool

        chunk_holder_ref.destroy()
Ejemplo n.º 3
0
    def testWorkerProcessRestart(self):
        mock_scheduler_addr = '127.0.0.1:%d' % get_next_port()
        try:
            with create_actor_pool(n_process=1,
                                   backend='gevent',
                                   address=mock_scheduler_addr) as pool:
                pool.create_actor(SchedulerClusterInfoActor,
                                  schedulers=[mock_scheduler_addr],
                                  uid=SchedulerClusterInfoActor.default_name())

                pool.create_actor(ChunkMetaActor,
                                  uid=ChunkMetaActor.default_name())
                resource_ref = pool.create_actor(
                    ResourceActor, uid=ResourceActor.default_name())

                proc = subprocess.Popen([
                    sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1',
                    '--schedulers', mock_scheduler_addr, '--cpu-procs', '1',
                    '--cache-mem', '10m', '--spill-dir', self._spill_dir,
                    '--ignore-avail-mem'
                ])
                worker_endpoint = self._wait_worker_ready(proc, resource_ref)

                daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(),
                                            address=worker_endpoint)
                dispatch_ref = pool.actor_ref(DispatchActor.default_name(),
                                              address=worker_endpoint)
                cpu_slots = dispatch_ref.get_slots('cpu')
                calc_ref = pool.actor_ref(cpu_slots[0],
                                          address=worker_endpoint)
                daemon_ref.kill_actor_process(calc_ref)

                check_start = time.time()
                while not daemon_ref.is_actor_process_alive(calc_ref):
                    gevent.sleep(0.1)
                    if time.time() - check_start > 10:
                        raise TimeoutError('Check process restart timeout')
        finally:
            if proc.poll() is None:
                proc.send_signal(signal.SIGINT)
                check_time = time.time()
                while True:
                    time.sleep(0.1)
                    if proc.poll(
                    ) is not None or time.time() - check_time >= 5:
                        break
                if proc.poll() is None:
                    proc.kill()
            if os.path.exists(options.worker.plasma_socket):
                os.unlink(options.worker.plasma_socket)
Ejemplo n.º 4
0
    def testMemQuotaAllocation(self):
        from mars import resource
        from mars.utils import AttributeDict

        mock_mem_stat = AttributeDict(
            dict(total=300, available=50, used=0, free=50))
        local_pool_addr = 'localhost:%d' % get_next_port()
        with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \
                patch_method(resource.virtual_memory, new=lambda: mock_mem_stat):
            pool.create_actor(WorkerClusterInfoActor,
                              schedulers=[local_pool_addr],
                              uid=WorkerClusterInfoActor.default_name())
            pool.create_actor(StatusActor,
                              local_pool_addr,
                              uid=StatusActor.default_name())

            pool.create_actor(DispatchActor, uid=DispatchActor.default_name())
            pool.create_actor(ProcessHelperActor,
                              uid=ProcessHelperActor.default_name())
            quota_ref = pool.create_actor(MemQuotaActor,
                                          300,
                                          refresh_time=0.1,
                                          uid=MemQuotaActor.default_name())

            time_recs = []
            with self.run_actor_test(pool) as test_actor:
                ref = test_actor.promise_ref(quota_ref)
                time_recs.append(time.time())

                def actual_exec(x):
                    ref.release_quota(x)
                    time_recs.append(time.time())
                    test_actor.set_result(None)

                ref.request_quota('req', 100, _promise=True) \
                    .then(functools.partial(actual_exec, 'req'))

                pool.sleep(0.5)
                mock_mem_stat['available'] = 150
                mock_mem_stat['free'] = 150

                self.get_result(2)

            self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
Ejemplo n.º 5
0
 def post_create(self):
     super(MockReceiverActor, self).post_create()
     self._dispatch_ref = self.ctx.actor_ref(DispatchActor.default_name())
     self._dispatch_ref.register_free_slot(self.uid, 'receiver')
Ejemplo n.º 6
0
    def testSimpleTransfer(self):
        session_id = str(uuid.uuid4())

        local_pool_addr = 'localhost:%d' % get_next_port()
        remote_pool_addr = 'localhost:%d' % get_next_port()
        remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)]
        msg_queue = multiprocessing.Queue()

        remote_spill_dir = os.path.join(tempfile.gettempdir(),
                                        'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker)))

        proc = multiprocessing.Process(
            target=run_transfer_worker,
            args=(remote_pool_addr, session_id, remote_chunk_keys, remote_spill_dir, msg_queue)
        )
        proc.start()
        try:
            remote_plasma_socket = msg_queue.get(timeout=30)
        except Empty:
            if proc.is_alive():
                proc.terminate()
            raise

        with start_transfer_test_pool(address=local_pool_addr, plasma_size=self.plasma_storage_size) as pool:
            sender_refs, receiver_refs = [], []
            for _ in range(2):
                sender_refs.append(pool.create_actor(SenderActor, uid=str(uuid.uuid4())))
                receiver_refs.append(pool.create_actor(ReceiverActor, uid=str(uuid.uuid4())))

            try:
                for data_id in (-1, 0):
                    chunk_key = remote_chunk_keys[data_id]

                    with self.run_actor_test(pool) as test_actor:
                        remote_dispatch_ref = test_actor.promise_ref(
                            DispatchActor.default_name(), address=remote_pool_addr)
                        remote_mapper_ref = pool.actor_ref(
                            PlasmaKeyMapActor.default_name(), address=remote_pool_addr)
                        remote_plasma_client = plasma.connect(remote_plasma_socket, '', 0)
                        remote_store = PlasmaChunkStore(remote_plasma_client, remote_mapper_ref)

                        def _call_send_data(sender_uid):
                            sender_ref = test_actor.promise_ref(sender_uid, address=remote_pool_addr)
                            return sender_ref.send_data(session_id, chunk_key, local_pool_addr, _promise=True)

                        def _test_data_exist(*_):
                            try:
                                local_data = test_actor._chunk_store.get(session_id, chunk_key)
                            except KeyError:
                                with open(build_spill_file_name(chunk_key), 'rb') as spill_file:
                                    local_data = dataserializer.load(spill_file)

                            try:
                                remote_data = remote_store.get(session_id, chunk_key)
                            except KeyError:
                                with open(build_spill_file_name(chunk_key, remote_spill_dir), 'rb') as spill_file:
                                    remote_data = dataserializer.load(spill_file)
                            assert_array_equal(local_data, remote_data)

                            del local_data, remote_data

                        remote_dispatch_ref.get_free_slot('sender', _promise=True) \
                            .then(_call_send_data) \
                            .then(_test_data_exist) \
                            .then(
                            lambda *_: test_actor.set_result(chunk_key),
                            lambda *exc: test_actor.set_result(exc, False),
                        )
                    self.assertEqual(self.get_result(60), chunk_key)

                msg_queue.put(1)
            finally:
                [pool.destroy_actor(ref) for ref in sender_refs + receiver_refs]

                os.unlink(remote_plasma_socket)
                os.kill(proc.pid, signal.SIGINT)

                t = time.time()
                while proc.is_alive() and time.time() < t + 2:
                    time.sleep(1)
                if proc.is_alive():
                    proc.terminate()