def testDaemon(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() with create_actor_pool(n_process=2, backend='gevent', distributor=MarsDistributor(2, 'w:0:'), address=mock_scheduler_addr) as pool: daemon_ref = pool.create_actor( WorkerDaemonActor, uid=WorkerDaemonActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) sleeper_ref = daemon_ref.create_actor(DaemonSleeperActor, uid='w:1:DaemonSleeperActor') daemon_ref.create_actor(ProcessHelperActor, uid='w:1:ProcHelper') test_actor = pool.create_actor(DaemonTestActor) daemon_ref.register_callback(test_actor, 'handle_process_down') test_actor.run_test_sleep(sleeper_ref, 10, _tell=True) self.assertTrue(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.sleep(0.5) daemon_ref.kill_actor_process(sleeper_ref) # repeated kill shall not produce errors daemon_ref.kill_actor_process(sleeper_ref) self.assertFalse(daemon_ref.is_actor_process_alive(sleeper_ref)) pool.restart_process(1) daemon_ref.handle_process_down([1]) pool.sleep(1) self.assertTrue(pool.has_actor(sleeper_ref)) with self.assertRaises(WorkerProcessStopped): test_actor.get_result() test_actor.run_test_sleep(sleeper_ref, 1) pool.sleep(1.5) test_actor.get_result()
def start_transfer_test_pool(**kwargs): address = kwargs.pop('address') plasma_size = kwargs.pop('plasma_size') with create_actor_pool(n_process=1, backend='gevent', address=address, **kwargs) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[address], uid=SchedulerClusterInfoActor.default_name()) pool.create_actor(WorkerClusterInfoActor, schedulers=[address], uid=WorkerClusterInfoActor.default_name()) pool.create_actor(PlasmaKeyMapActor, uid=PlasmaKeyMapActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(QuotaActor, 1024 * 1024 * 20, uid=MemQuotaActor.default_name()) chunk_holder_ref = pool.create_actor( ChunkHolderActor, plasma_size, uid=ChunkHolderActor.default_name()) pool.create_actor(SpillActor) pool.create_actor(StatusActor, address, uid=StatusActor.default_name()) yield pool chunk_holder_ref.destroy()
def testWorkerProcessRestart(self): mock_scheduler_addr = '127.0.0.1:%d' % get_next_port() try: with create_actor_pool(n_process=1, backend='gevent', address=mock_scheduler_addr) as pool: pool.create_actor(SchedulerClusterInfoActor, schedulers=[mock_scheduler_addr], uid=SchedulerClusterInfoActor.default_name()) pool.create_actor(ChunkMetaActor, uid=ChunkMetaActor.default_name()) resource_ref = pool.create_actor( ResourceActor, uid=ResourceActor.default_name()) proc = subprocess.Popen([ sys.executable, '-m', 'mars.worker', '-a', '127.0.0.1', '--schedulers', mock_scheduler_addr, '--cpu-procs', '1', '--cache-mem', '10m', '--spill-dir', self._spill_dir, '--ignore-avail-mem' ]) worker_endpoint = self._wait_worker_ready(proc, resource_ref) daemon_ref = pool.actor_ref(WorkerDaemonActor.default_name(), address=worker_endpoint) dispatch_ref = pool.actor_ref(DispatchActor.default_name(), address=worker_endpoint) cpu_slots = dispatch_ref.get_slots('cpu') calc_ref = pool.actor_ref(cpu_slots[0], address=worker_endpoint) daemon_ref.kill_actor_process(calc_ref) check_start = time.time() while not daemon_ref.is_actor_process_alive(calc_ref): gevent.sleep(0.1) if time.time() - check_start > 10: raise TimeoutError('Check process restart timeout') finally: if proc.poll() is None: proc.send_signal(signal.SIGINT) check_time = time.time() while True: time.sleep(0.1) if proc.poll( ) is not None or time.time() - check_time >= 5: break if proc.poll() is None: proc.kill() if os.path.exists(options.worker.plasma_socket): os.unlink(options.worker.plasma_socket)
def testMemQuotaAllocation(self): from mars import resource from mars.utils import AttributeDict mock_mem_stat = AttributeDict( dict(total=300, available=50, used=0, free=50)) local_pool_addr = 'localhost:%d' % get_next_port() with create_actor_pool(n_process=1, backend='gevent', address=local_pool_addr) as pool, \ patch_method(resource.virtual_memory, new=lambda: mock_mem_stat): pool.create_actor(WorkerClusterInfoActor, schedulers=[local_pool_addr], uid=WorkerClusterInfoActor.default_name()) pool.create_actor(StatusActor, local_pool_addr, uid=StatusActor.default_name()) pool.create_actor(DispatchActor, uid=DispatchActor.default_name()) pool.create_actor(ProcessHelperActor, uid=ProcessHelperActor.default_name()) quota_ref = pool.create_actor(MemQuotaActor, 300, refresh_time=0.1, uid=MemQuotaActor.default_name()) time_recs = [] with self.run_actor_test(pool) as test_actor: ref = test_actor.promise_ref(quota_ref) time_recs.append(time.time()) def actual_exec(x): ref.release_quota(x) time_recs.append(time.time()) test_actor.set_result(None) ref.request_quota('req', 100, _promise=True) \ .then(functools.partial(actual_exec, 'req')) pool.sleep(0.5) mock_mem_stat['available'] = 150 mock_mem_stat['free'] = 150 self.get_result(2) self.assertGreater(abs(time_recs[0] - time_recs[1]), 0.4)
def post_create(self): super(MockReceiverActor, self).post_create() self._dispatch_ref = self.ctx.actor_ref(DispatchActor.default_name()) self._dispatch_ref.register_free_slot(self.uid, 'receiver')
def testSimpleTransfer(self): session_id = str(uuid.uuid4()) local_pool_addr = 'localhost:%d' % get_next_port() remote_pool_addr = 'localhost:%d' % get_next_port() remote_chunk_keys = [str(uuid.uuid4()) for _ in range(9)] msg_queue = multiprocessing.Queue() remote_spill_dir = os.path.join(tempfile.gettempdir(), 'mars_spill_%d_%d' % (os.getpid(), id(run_transfer_worker))) proc = multiprocessing.Process( target=run_transfer_worker, args=(remote_pool_addr, session_id, remote_chunk_keys, remote_spill_dir, msg_queue) ) proc.start() try: remote_plasma_socket = msg_queue.get(timeout=30) except Empty: if proc.is_alive(): proc.terminate() raise with start_transfer_test_pool(address=local_pool_addr, plasma_size=self.plasma_storage_size) as pool: sender_refs, receiver_refs = [], [] for _ in range(2): sender_refs.append(pool.create_actor(SenderActor, uid=str(uuid.uuid4()))) receiver_refs.append(pool.create_actor(ReceiverActor, uid=str(uuid.uuid4()))) try: for data_id in (-1, 0): chunk_key = remote_chunk_keys[data_id] with self.run_actor_test(pool) as test_actor: remote_dispatch_ref = test_actor.promise_ref( DispatchActor.default_name(), address=remote_pool_addr) remote_mapper_ref = pool.actor_ref( PlasmaKeyMapActor.default_name(), address=remote_pool_addr) remote_plasma_client = plasma.connect(remote_plasma_socket, '', 0) remote_store = PlasmaChunkStore(remote_plasma_client, remote_mapper_ref) def _call_send_data(sender_uid): sender_ref = test_actor.promise_ref(sender_uid, address=remote_pool_addr) return sender_ref.send_data(session_id, chunk_key, local_pool_addr, _promise=True) def _test_data_exist(*_): try: local_data = test_actor._chunk_store.get(session_id, chunk_key) except KeyError: with open(build_spill_file_name(chunk_key), 'rb') as spill_file: local_data = dataserializer.load(spill_file) try: remote_data = remote_store.get(session_id, chunk_key) except KeyError: with open(build_spill_file_name(chunk_key, remote_spill_dir), 'rb') as spill_file: remote_data = dataserializer.load(spill_file) assert_array_equal(local_data, remote_data) del local_data, remote_data remote_dispatch_ref.get_free_slot('sender', _promise=True) \ .then(_call_send_data) \ .then(_test_data_exist) \ .then( lambda *_: test_actor.set_result(chunk_key), lambda *exc: test_actor.set_result(exc, False), ) self.assertEqual(self.get_result(60), chunk_key) msg_queue.put(1) finally: [pool.destroy_actor(ref) for ref in sender_refs + receiver_refs] os.unlink(remote_plasma_socket) os.kill(proc.pid, signal.SIGINT) t = time.time() while proc.is_alive() and time.time() < t + 2: time.sleep(1) if proc.is_alive(): proc.terminate()