async def test_quota(actor_pool): quota_ref = await mo.create_actor( QuotaActor, (actor_pool.external_address, 'numa-0'), 300, uid=QuotaActor.gen_uid('cpu-0'), address=actor_pool.external_address) # type: QuotaActorRef # test quota options with non-existing keys await quota_ref.hold_quotas(['non_exist']) await quota_ref.release_quotas(['non_exist']) with pytest.raises(ValueError): await quota_ref.request_batch_quota({'ERROR': 1000}) # test quota request with immediate return await quota_ref.request_batch_quota({'0': 100}) await quota_ref.request_batch_quota({'0': 50}) await quota_ref.request_batch_quota({'0': 200}) # test request with process_quota=True await quota_ref.request_batch_quota({'0': 200}) await quota_ref.alter_allocations(['0'], [190]) assert (await quota_ref.dump_data()).allocations['0'] == 190 await quota_ref.hold_quotas(['0']) assert '0' in (await quota_ref.dump_data()).hold_sizes req_task1 = asyncio.create_task(quota_ref.request_batch_quota({'1': 150})) req_task2 = asyncio.create_task(quota_ref.request_batch_quota({'2': 50})) asyncio.create_task(quota_ref.request_batch_quota({'3': 200})) asyncio.create_task(quota_ref.request_batch_quota({'3': 180})) await asyncio.sleep(0.1) assert '2' not in (await quota_ref.dump_data()).allocations req_task1.cancel() with pytest.raises(asyncio.CancelledError): await req_task1 await asyncio.wait_for(req_task2, timeout=1) assert '1' not in (await quota_ref.dump_data()).allocations assert '2' in (await quota_ref.dump_data()).allocations assert '3' not in (await quota_ref.dump_data()).allocations await quota_ref.release_quotas(['0']) assert '3' in (await quota_ref.dump_data()).allocations req_task4 = asyncio.create_task(quota_ref.request_batch_quota({'4': 180})) await asyncio.sleep(0) assert '4' not in (await quota_ref.dump_data()).allocations await quota_ref.alter_allocations(['3'], [50]) await req_task4 assert '4' in (await quota_ref.dump_data()).allocations
async def test_execute_tensor(actor_pool): pool, session_id, meta_api, storage_api, execution_ref = actor_pool data1 = np.random.rand(10, 10) data2 = np.random.rand(10, 10) input1 = TensorFetch(key='input1', source_key='input2', dtype=np.dtype(int)).new_chunk([]) input2 = TensorFetch(key='input2', source_key='input2', dtype=np.dtype(int)).new_chunk([]) result_chunk = TensorTreeAdd(args=[input1, input2]) \ .new_chunk([input1, input2], shape=data1.shape, dtype=data1.dtype) await meta_api.set_chunk_meta(input1, memory_size=data1.nbytes, store_size=data1.nbytes, bands=[(pool.external_address, 'numa-0')]) await meta_api.set_chunk_meta(input2, memory_size=data1.nbytes, store_size=data2.nbytes, bands=[(pool.external_address, 'numa-0')]) # todo use different storage level when storage ready await storage_api.put(input1.key, data1) await storage_api.put(input2.key, data2) chunk_graph = ChunkGraph([result_chunk]) chunk_graph.add_node(input1) chunk_graph.add_node(input2) chunk_graph.add_node(result_chunk) chunk_graph.add_edge(input1, result_chunk) chunk_graph.add_edge(input2, result_chunk) subtask = Subtask('test_task', session_id=session_id, chunk_graph=chunk_graph) await execution_ref.run_subtask(subtask, 'numa-0', pool.external_address) # check if results are correct result = await storage_api.get(result_chunk.key) np.testing.assert_array_equal(data1 + data2, result) # check if quota computations are correct quota_ref = await mo.actor_ref(QuotaActor.gen_uid('numa-0'), address=pool.external_address) [quota] = await quota_ref.get_batch_quota_reqs() assert quota[(subtask.subtask_id, subtask.subtask_id)] == data1.nbytes # check if metas are correct result_meta = await meta_api.get_chunk_meta(result_chunk.key) assert result_meta['object_id'] == result_chunk.key assert result_meta['shape'] == result.shape
async def actor_pool(request): n_slots = request.param pool = await mo.create_actor_pool('127.0.0.1', labels=[None] + ['numa-0'] * n_slots, n_process=n_slots) async with pool: session_id = 'test_session' await MockClusterAPI.create(pool.external_address, band_to_slots={'numa-0': n_slots}) await MockSessionAPI.create(pool.external_address, session_id=session_id) meta_api = await MockMetaAPI.create(session_id, pool.external_address) await MockLifecycleAPI.create(session_id, pool.external_address) await MockSubtaskAPI.create(pool.external_address) storage_api = await MockStorageAPI.create( session_id, pool.external_address, storage_manger_cls=MockStorageManagerActor) # create assigner actor execution_ref = await mo.create_actor( SubtaskExecutionActor, uid=SubtaskExecutionActor.default_uid(), address=pool.external_address) # create quota actor await mo.create_actor(MockQuotaActor, 102400, uid=QuotaActor.gen_uid('numa-0'), address=pool.external_address) # create dispatcher actor await mo.create_actor(MockBandSlotManagerActor, 'numa-0', n_slots, uid=BandSlotManagerActor.gen_uid('numa-0'), address=pool.external_address) # create mock task manager actor await mo.create_actor(MockTaskManager, uid=TaskManagerActor.gen_uid(session_id), address=pool.external_address) yield pool, session_id, meta_api, storage_api, execution_ref
async def test_batch_quota_allocation(actor_pool): quota_ref = await mo.create_actor( QuotaActor, 300, uid=QuotaActor.gen_uid('cpu-0'), address=actor_pool.external_address) # type: QuotaActorRef end_time = [] async def task_fun(b): await quota_ref.request_batch_quota(b) await asyncio.sleep(0.5) assert set(b.keys()) == set((await quota_ref.dump_data()).allocations.keys()) await quota_ref.release_quotas(list(b.keys())) end_time.append(time.time()) tasks = [] for idx in (0, 1): keys = [f'{idx}_0', f'{idx}_1'] batch = dict((k, 100) for k in keys) tasks.append(asyncio.create_task(task_fun(batch))) await asyncio.wait_for(asyncio.gather(*tasks), timeout=10) assert abs(end_time[0] - end_time[1]) > 0.4 assert await quota_ref.get_allocated_size() == 0
async def test_execute_with_cancel(actor_pool, cancel_phase): pool, session_id, meta_api, storage_api, execution_ref = actor_pool # config for different phases ref_to_delay = None if cancel_phase == 'prepare': ref_to_delay = await mo.actor_ref(StorageManagerActor.default_uid(), address=pool.external_address) elif cancel_phase == 'quota': ref_to_delay = await mo.actor_ref(QuotaActor.gen_uid('numa-0'), address=pool.external_address) elif cancel_phase == 'slot': ref_to_delay = await mo.actor_ref( BandSlotManagerActor.gen_uid('numa-0'), address=pool.external_address) if ref_to_delay: await ref_to_delay.set_delay_fetch_time(100) def delay_fun(delay, _inp1): time.sleep(delay) return delay input1 = TensorFetch(key='input1', source_key='input1', dtype=np.dtype(int)).new_chunk([]) remote_result = RemoteFunction(function=delay_fun, function_args=[100, input1], function_kwargs={}, n_output=1) \ .new_chunk([input1]) data1 = np.random.rand(10, 10) await meta_api.set_chunk_meta(input1, memory_size=data1.nbytes, store_size=data1.nbytes, bands=[(pool.external_address, 'numa-0')]) await storage_api.put(input1.key, data1) chunk_graph = ChunkGraph([remote_result]) chunk_graph.add_node(input1) chunk_graph.add_node(remote_result) chunk_graph.add_edge(input1, remote_result) subtask = Subtask(f'test_task_{uuid.uuid4()}', session_id=session_id, chunk_graph=chunk_graph) aiotask = asyncio.create_task( execution_ref.run_subtask(subtask, 'numa-0', pool.external_address)) await asyncio.sleep(1) with Timer() as timer: await execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1) with pytest.raises(asyncio.CancelledError): await asyncio.wait_for(aiotask, timeout=30) assert timer.duration < 6 # check for different phases if ref_to_delay is not None: assert await ref_to_delay.get_is_cancelled() await ref_to_delay.set_delay_fetch_time(0) # test if slot is restored remote_tileable = mr.spawn(delay_fun, args=(0.5, None)) graph = TileableGraph([remote_tileable.data]) next(TileableGraphBuilder(graph).build()) chunk_graph = next(ChunkGraphBuilder(graph, fuse_enabled=False).build()) subtask = Subtask(f'test_task2_{uuid.uuid4()}', session_id=session_id, chunk_graph=chunk_graph) await asyncio.wait_for(execution_ref.run_subtask(subtask, 'numa-0', pool.external_address), timeout=30)