def test_task_default_resources(self): task1 = local_scheduler.Task(random_driver_id(), random_function_id(), [random_object_id()], 0, random_task_id(), 0) self.assertEqual(task1.required_resources(), [1.0, 0.0]) task2 = local_scheduler.Task(random_driver_id(), random_function_id(), [random_object_id()], 0, random_task_id(), 0, local_scheduler.ObjectID(NIL_ACTOR_ID), 0, [1.0, 2.0]) self.assertEqual(task2.required_resources(), [1.0, 2.0])
def test_submit_and_get_task(self): function_id = random_function_id() object_ids = [random_object_id() for i in range(256)] # Create and seal the objects in the object store so that we can # schedule all of the subsequent tasks. for object_id in object_ids: self.plasma_client.create(pa.plasma.ObjectID(object_id.id()), 0) self.plasma_client.seal(pa.plasma.ObjectID(object_id.id())) # Define some arguments to use for the tasks. args_list = [[], [{}], [()], 1 * [1], 10 * [1], 100 * [1], 1000 * [1], 1 * ["a"], 10 * ["a"], 100 * ["a"], 1000 * ["a"], [ 1, 1.3, 1 << 100, "hi", u"hi", [1, 2] ], object_ids[:1], object_ids[:2], object_ids[:3], object_ids[:4], object_ids[:5], object_ids[:10], object_ids[:100], object_ids[:256], [1, object_ids[0]], [ object_ids[0], "a" ], [1, object_ids[0], "a"], [ object_ids[0], 1, object_ids[1], "a" ], object_ids[:3] + [1, "hi", 2.3] + object_ids[:5], object_ids + 100 * ["a"] + object_ids] for args in args_list: for num_return_vals in [0, 1, 2, 3, 5, 10, 100]: task = local_scheduler.Task(random_driver_id(), function_id, args, num_return_vals, random_task_id(), 0) # Submit a task. self.local_scheduler_client.submit(task) # Get the task. new_task = self.local_scheduler_client.get_task() self.assertEqual(task.function_id().id(), new_task.function_id().id()) retrieved_args = new_task.arguments() returns = new_task.returns() self.assertEqual(len(args), len(retrieved_args)) self.assertEqual(num_return_vals, len(returns)) for i in range(len(retrieved_args)): if isinstance(args[i], local_scheduler.ObjectID): self.assertEqual(args[i].id(), retrieved_args[i].id()) else: self.assertEqual(args[i], retrieved_args[i]) # Submit all of the tasks. for args in args_list: for num_return_vals in [0, 1, 2, 3, 5, 10, 100]: task = local_scheduler.Task(random_driver_id(), function_id, args, num_return_vals, random_task_id(), 0) self.local_scheduler_client.submit(task) # Get all of the tasks. for args in args_list: for num_return_vals in [0, 1, 2, 3, 5, 10, 100]: new_task = self.local_scheduler_client.get_task()
def test_task_default_resources(self): task1 = local_scheduler.Task( random_driver_id(), random_function_id(), [random_object_id()], 0, random_task_id(), 0) self.assertEqual(task1.required_resources(), {"CPU": 1}) task2 = local_scheduler.Task( random_driver_id(), random_function_id(), [random_object_id()], 0, random_task_id(), 0, local_scheduler.ObjectID(NIL_ACTOR_ID), local_scheduler.ObjectID(NIL_OBJECT_ID), local_scheduler.ObjectID(NIL_ACTOR_ID), local_scheduler.ObjectID(NIL_ACTOR_ID), 0, 0, [], { "CPU": 1, "GPU": 2 }) self.assertEqual(task2.required_resources(), {"CPU": 1, "GPU": 2})
def test_create_and_serialize_task(self): # TODO(rkn): The function ID should be a FunctionID object, not an # ObjectID. driver_id = random_driver_id() parent_id = random_task_id() function_id = random_function_id() object_ids = [random_object_id() for _ in range(256)] args_list = [[], 1 * [1], 10 * [1], 100 * [1], 1000 * [1], 1 * ["a"], 10 * ["a"], 100 * ["a"], 1000 * ["a"], [1, 1.3, 2, 1 << 100, "hi", u"hi", [1, 2]], object_ids[:1], object_ids[:2], object_ids[:3], object_ids[:4], object_ids[:5], object_ids[:10], object_ids[:100], object_ids[:256], [1, object_ids[0]], [object_ids[0], "a"], [1, object_ids[0], "a"], [object_ids[0], 1, object_ids[1], "a"], object_ids[:3] + [1, "hi", 2.3] + object_ids[:5], object_ids + 100 * ["a"] + object_ids] for args in args_list: for num_return_vals in [0, 1, 2, 3, 5, 10, 100]: task = local_scheduler.Task(driver_id, function_id, args, num_return_vals, parent_id, 0) self.check_task(task, function_id, num_return_vals, args) data = local_scheduler.task_to_string(task) task2 = local_scheduler.task_from_string(data) self.check_task(task2, function_id, num_return_vals, args)
def integration_many_tasks_helper(self, timesync=True): # There should be three db clients, the global scheduler, the local # scheduler, and the plasma manager. self.assertEqual( len(self.redis_client.keys("{}*".format(DB_CLIENT_PREFIX))), 2 * NUM_CLUSTER_NODES + 1) num_return_vals = [0, 1, 2, 3, 5, 10] # Submit a bunch of tasks to Redis. num_tasks = 1000 for _ in range(num_tasks): # Create a new object for each task. data_size = np.random.randint(1 << 20) metadata_size = np.random.randint(1 << 10) plasma_client = self.plasma_clients[0] object_dep, memory_buffer, metadata = create_object(plasma_client, data_size, metadata_size, seal=True) if timesync: # Give 10ms for object info handler to fire (long enough to yield CPU). time.sleep(0.010) task = local_scheduler.Task(random_driver_id(), random_function_id(), [local_scheduler.ObjectID(object_dep)], num_return_vals[0], random_task_id(), 0) self.local_scheduler_clients[0].submit(task) # Check that there are the correct number of tasks in Redis and that they # all get assigned to the local scheduler. num_retries = 10 num_tasks_done = 0 while num_retries > 0: task_entries = self.redis_client.keys("{}*".format(TASK_PREFIX)) self.assertLessEqual(len(task_entries), num_tasks) # First, check if all tasks made it to Redis. if len(task_entries) == num_tasks: task_contents = [self.redis_client.hgetall(task_entries[i]) for i in range(len(task_entries))] task_statuses = [int(contents[b"state"]) for contents in task_contents] self.assertTrue(all([status in [TASK_STATUS_WAITING, TASK_STATUS_SCHEDULED, TASK_STATUS_QUEUED] for status in task_statuses])) num_tasks_done = task_statuses.count(TASK_STATUS_QUEUED) num_tasks_scheduled = task_statuses.count(TASK_STATUS_SCHEDULED) num_tasks_waiting = task_statuses.count(TASK_STATUS_WAITING) print("tasks in Redis = {}, tasks waiting = {}, tasks scheduled = {}, " "tasks queued = {}, retries left = {}" .format(len(task_entries), num_tasks_waiting, num_tasks_scheduled, num_tasks_done, num_retries)) if all([status == TASK_STATUS_QUEUED for status in task_statuses]): # We're done, so pass. break num_retries -= 1 time.sleep(0.1) if num_tasks_done != num_tasks: # At least one of the tasks failed to schedule. self.tearDown() sys.exit(2)
def test_scheduling_when_objects_evicted(self): # Create a task with two dependencies and submit it. object_id1 = random_object_id() object_id2 = random_object_id() task = local_scheduler.Task(random_driver_id(), random_function_id(), [object_id1, object_id2], 0, random_task_id(), 0) self.local_scheduler_client.submit(task) # Launch a thread to get the task. def get_task(): self.local_scheduler_client.get_task() t = threading.Thread(target=get_task) t.start() # Make one of the dependencies available. self.plasma_client.create(object_id1.id(), 1) self.plasma_client.seal(object_id1.id()) # Check that the thread is still waiting for a task. time.sleep(0.1) self.assertTrue(t.is_alive()) # Force eviction of the first dependency. num_objects = 4 object_size = plasma.DEFAULT_PLASMA_STORE_MEMORY // num_objects for i in range(num_objects + 1): object_id = random_object_id() self.plasma_client.create(object_id.id(), object_size) self.plasma_client.seal(object_id.id()) # Check that the thread is still waiting for a task. time.sleep(0.1) self.assertTrue(t.is_alive()) # Check that the first object dependency was evicted. object1 = self.plasma_client.get([object_id1.id()], timeout_ms=0) self.assertEqual(object1, [None]) # Check that the thread is still waiting for a task. time.sleep(0.1) self.assertTrue(t.is_alive()) # Create the second dependency. self.plasma_client.create(object_id2.id(), 1) self.plasma_client.seal(object_id2.id()) # Check that the thread is still waiting for a task. time.sleep(0.1) self.assertTrue(t.is_alive()) # Create the first dependency again. Both dependencies are now available. self.plasma_client.create(object_id1.id(), 1) self.plasma_client.seal(object_id1.id()) # Wait until the thread finishes so that we know the task was scheduled. t.join()
def test_integration_single_task(self): # There should be three db clients, the global scheduler, the local # scheduler, and the plasma manager. self.assertEqual(len(self.state.client_table()[self.node_ip_address]), 2 * NUM_CLUSTER_NODES + 1) num_return_vals = [0, 1, 2, 3, 5, 10] # Insert the object into Redis. data_size = 0xf1f0 metadata_size = 0x40 plasma_client = self.plasma_clients[0] object_dep, memory_buffer, metadata = create_object(plasma_client, data_size, metadata_size, seal=True) # Sleep before submitting task to local scheduler. time.sleep(0.1) # Submit a task to Redis. task = local_scheduler.Task( random_driver_id(), random_function_id(), [local_scheduler.ObjectID(object_dep.binary())], num_return_vals[0], random_task_id(), 0) self.local_scheduler_clients[0].submit(task) time.sleep(0.1) # There should now be a task in Redis, and it should get assigned to # the local scheduler num_retries = 10 while num_retries > 0: task_entries = self.state.task_table() self.assertLessEqual(len(task_entries), 1) if len(task_entries) == 1: task_id, task = task_entries.popitem() task_status = task["State"] self.assertTrue(task_status in [ state.TASK_STATUS_WAITING, state.TASK_STATUS_SCHEDULED, state.TASK_STATUS_QUEUED ]) if task_status == state.TASK_STATUS_QUEUED: break else: print(task_status) print("The task has not been scheduled yet, trying again.") num_retries -= 1 time.sleep(1) if num_retries <= 0 and task_status != state.TASK_STATUS_QUEUED: # Failed to submit and schedule a single task -- bail. self.tearDown() sys.exit(1)
def test_integration_single_task(self): # There should be three db clients, the global scheduler, the local # scheduler, and the plasma manager. self.assertEqual( len(self.redis_client.keys("{}*".format(DB_CLIENT_PREFIX))), 2 * NUM_CLUSTER_NODES + 1) num_return_vals = [0, 1, 2, 3, 5, 10] # Insert the object into Redis. data_size = 0xf1f0 metadata_size = 0x40 plasma_client = self.plasma_clients[0] object_dep, memory_buffer, metadata = create_object(plasma_client, data_size, metadata_size, seal=True) # Sleep before submitting task to local scheduler. time.sleep(0.1) # Submit a task to Redis. task = local_scheduler.Task(random_driver_id(), random_function_id(), [local_scheduler.ObjectID(object_dep)], num_return_vals[0], random_task_id(), 0) self.local_scheduler_clients[0].submit(task) time.sleep(0.1) # There should now be a task in Redis, and it should get assigned to the # local scheduler num_retries = 10 while num_retries > 0: task_entries = self.redis_client.keys("{}*".format(TASK_PREFIX)) self.assertLessEqual(len(task_entries), 1) if len(task_entries) == 1: task_contents = self.redis_client.hgetall(task_entries[0]) task_status = int(task_contents[b"state"]) self.assertTrue(task_status in [ TASK_STATUS_WAITING, TASK_STATUS_SCHEDULED, TASK_STATUS_QUEUED ]) if task_status == TASK_STATUS_QUEUED: break else: print(task_status) print("The task has not been scheduled yet, trying again.") num_retries -= 1 time.sleep(1) if num_retries <= 0 and task_status != TASK_STATUS_QUEUED: # Failed to submit and schedule a single task -- bail. self.tearDown() sys.exit(1)
def test_scheduling_when_objects_ready(self): # Create a task and submit it. object_id = random_object_id() task = local_scheduler.Task(random_driver_id(), random_function_id(), [object_id], 0, random_task_id(), 0) self.local_scheduler_client.submit(task) # Launch a thread to get the task. def get_task(): self.local_scheduler_client.get_task() t = threading.Thread(target=get_task) t.start() # Sleep to give the thread time to call get_task. time.sleep(0.1) # Create and seal the object ID in the object store. This should trigger a # scheduling event. self.plasma_client.create(object_id.id(), 0) self.plasma_client.seal(object_id.id()) # Wait until the thread finishes so that we know the task was scheduled. t.join()
def integration_many_tasks_helper(self, timesync=True): # There should be three db clients, the global scheduler, the local # scheduler, and the plasma manager. self.assertEqual(len(self.state.client_table()[self.node_ip_address]), 2 * NUM_CLUSTER_NODES + 1) num_return_vals = [0, 1, 2, 3, 5, 10] # Submit a bunch of tasks to Redis. num_tasks = 1000 for _ in range(num_tasks): # Create a new object for each task. data_size = np.random.randint(1 << 12) metadata_size = np.random.randint(1 << 9) plasma_client = self.plasma_clients[0] object_dep, memory_buffer, metadata = create_object(plasma_client, data_size, metadata_size, seal=True) if timesync: # Give 10ms for object info handler to fire (long enough to # yield CPU). time.sleep(0.010) task = local_scheduler.Task( random_driver_id(), random_function_id(), [local_scheduler.ObjectID(object_dep.binary())], num_return_vals[0], random_task_id(), 0) self.local_scheduler_clients[0].submit(task) # Check that there are the correct number of tasks in Redis and that # they all get assigned to the local scheduler. num_retries = 10 num_tasks_done = 0 while num_retries > 0: task_entries = self.state.task_table() self.assertLessEqual(len(task_entries), num_tasks) # First, check if all tasks made it to Redis. if len(task_entries) == num_tasks: task_statuses = [ task_entry["State"] for task_entry in task_entries.values() ] self.assertTrue( all([ status in [ state.TASK_STATUS_WAITING, state.TASK_STATUS_SCHEDULED, state.TASK_STATUS_QUEUED ] for status in task_statuses ])) num_tasks_done = task_statuses.count(state.TASK_STATUS_QUEUED) num_tasks_scheduled = task_statuses.count( state.TASK_STATUS_SCHEDULED) num_tasks_waiting = task_statuses.count( state.TASK_STATUS_WAITING) print("tasks in Redis = {}, tasks waiting = {}, " "tasks scheduled = {}, " "tasks queued = {}, retries left = {}".format( len(task_entries), num_tasks_waiting, num_tasks_scheduled, num_tasks_done, num_retries)) if all([ status == state.TASK_STATUS_QUEUED for status in task_statuses ]): # We're done, so pass. break num_retries -= 1 time.sleep(0.1) self.assertEqual(num_tasks_done, num_tasks)