Beispiel #1
0
 def test_task_default_resources(self):
   task1 = local_scheduler.Task(random_driver_id(), random_function_id(), [random_object_id()], 0, random_task_id(), 0)
   self.assertEqual(task1.required_resources(), [1.0, 0.0])
   task2 = local_scheduler.Task(random_driver_id(), random_function_id(),
                                [random_object_id()], 0, random_task_id(), 0,
                                local_scheduler.ObjectID(NIL_ACTOR_ID), 0, [1.0, 2.0])
   self.assertEqual(task2.required_resources(), [1.0, 2.0])
Beispiel #2
0
    def test_submit_and_get_task(self):
        function_id = random_function_id()
        object_ids = [random_object_id() for i in range(256)]
        # Create and seal the objects in the object store so that we can
        # schedule all of the subsequent tasks.
        for object_id in object_ids:
            self.plasma_client.create(pa.plasma.ObjectID(object_id.id()), 0)
            self.plasma_client.seal(pa.plasma.ObjectID(object_id.id()))
        # Define some arguments to use for the tasks.
        args_list = [[], [{}], [()], 1 * [1], 10 * [1], 100 * [1], 1000 * [1],
                     1 * ["a"], 10 * ["a"], 100 * ["a"], 1000 * ["a"], [
                         1, 1.3, 1 << 100, "hi", u"hi", [1, 2]
                     ], object_ids[:1], object_ids[:2], object_ids[:3],
                     object_ids[:4], object_ids[:5], object_ids[:10],
                     object_ids[:100], object_ids[:256], [1, object_ids[0]], [
                         object_ids[0], "a"
                     ], [1, object_ids[0], "a"], [
                         object_ids[0], 1, object_ids[1], "a"
                     ], object_ids[:3] + [1, "hi", 2.3] + object_ids[:5],
                     object_ids + 100 * ["a"] + object_ids]

        for args in args_list:
            for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
                task = local_scheduler.Task(random_driver_id(), function_id,
                                            args, num_return_vals,
                                            random_task_id(), 0)
                # Submit a task.
                self.local_scheduler_client.submit(task)
                # Get the task.
                new_task = self.local_scheduler_client.get_task()
                self.assertEqual(task.function_id().id(),
                                 new_task.function_id().id())
                retrieved_args = new_task.arguments()
                returns = new_task.returns()
                self.assertEqual(len(args), len(retrieved_args))
                self.assertEqual(num_return_vals, len(returns))
                for i in range(len(retrieved_args)):
                    if isinstance(args[i], local_scheduler.ObjectID):
                        self.assertEqual(args[i].id(), retrieved_args[i].id())
                    else:
                        self.assertEqual(args[i], retrieved_args[i])

        # Submit all of the tasks.
        for args in args_list:
            for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
                task = local_scheduler.Task(random_driver_id(), function_id,
                                            args, num_return_vals,
                                            random_task_id(), 0)
                self.local_scheduler_client.submit(task)
        # Get all of the tasks.
        for args in args_list:
            for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
                new_task = self.local_scheduler_client.get_task()
Beispiel #3
0
 def test_task_default_resources(self):
     task1 = local_scheduler.Task(
         random_driver_id(), random_function_id(), [random_object_id()], 0,
         random_task_id(), 0)
     self.assertEqual(task1.required_resources(), {"CPU": 1})
     task2 = local_scheduler.Task(
         random_driver_id(), random_function_id(), [random_object_id()], 0,
         random_task_id(), 0, local_scheduler.ObjectID(NIL_ACTOR_ID),
         local_scheduler.ObjectID(NIL_OBJECT_ID),
         local_scheduler.ObjectID(NIL_ACTOR_ID),
         local_scheduler.ObjectID(NIL_ACTOR_ID), 0, 0, [], {
             "CPU": 1,
             "GPU": 2
         })
     self.assertEqual(task2.required_resources(), {"CPU": 1, "GPU": 2})
Beispiel #4
0
 def test_create_and_serialize_task(self):
     # TODO(rkn): The function ID should be a FunctionID object, not an
     # ObjectID.
     driver_id = random_driver_id()
     parent_id = random_task_id()
     function_id = random_function_id()
     object_ids = [random_object_id() for _ in range(256)]
     args_list = [[], 1 * [1], 10 * [1], 100 * [1], 1000 * [1], 1 * ["a"],
                  10 * ["a"], 100 * ["a"], 1000 * ["a"],
                  [1, 1.3, 2, 1 << 100, "hi", u"hi",
                   [1, 2]], object_ids[:1], object_ids[:2], object_ids[:3],
                  object_ids[:4], object_ids[:5], object_ids[:10],
                  object_ids[:100], object_ids[:256], [1, object_ids[0]],
                  [object_ids[0], "a"], [1, object_ids[0], "a"],
                  [object_ids[0], 1, object_ids[1],
                   "a"], object_ids[:3] + [1, "hi", 2.3] + object_ids[:5],
                  object_ids + 100 * ["a"] + object_ids]
     for args in args_list:
         for num_return_vals in [0, 1, 2, 3, 5, 10, 100]:
             task = local_scheduler.Task(driver_id, function_id, args,
                                         num_return_vals, parent_id, 0)
             self.check_task(task, function_id, num_return_vals, args)
             data = local_scheduler.task_to_string(task)
             task2 = local_scheduler.task_from_string(data)
             self.check_task(task2, function_id, num_return_vals, args)
Beispiel #5
0
  def integration_many_tasks_helper(self, timesync=True):
    # There should be three db clients, the global scheduler, the local
    # scheduler, and the plasma manager.
    self.assertEqual(
        len(self.redis_client.keys("{}*".format(DB_CLIENT_PREFIX))),
        2 * NUM_CLUSTER_NODES + 1)
    num_return_vals = [0, 1, 2, 3, 5, 10]

    # Submit a bunch of tasks to Redis.
    num_tasks = 1000
    for _ in range(num_tasks):
      # Create a new object for each task.
      data_size = np.random.randint(1 << 20)
      metadata_size = np.random.randint(1 << 10)
      plasma_client = self.plasma_clients[0]
      object_dep, memory_buffer, metadata = create_object(plasma_client,
                                                          data_size,
                                                          metadata_size,
                                                          seal=True)
      if timesync:
        # Give 10ms for object info handler to fire (long enough to yield CPU).
        time.sleep(0.010)
      task = local_scheduler.Task(random_driver_id(), random_function_id(),
                                  [local_scheduler.ObjectID(object_dep)],
                                  num_return_vals[0], random_task_id(), 0)
      self.local_scheduler_clients[0].submit(task)
    # Check that there are the correct number of tasks in Redis and that they
    # all get assigned to the local scheduler.
    num_retries = 10
    num_tasks_done = 0
    while num_retries > 0:
      task_entries = self.redis_client.keys("{}*".format(TASK_PREFIX))
      self.assertLessEqual(len(task_entries), num_tasks)
      # First, check if all tasks made it to Redis.
      if len(task_entries) == num_tasks:
        task_contents = [self.redis_client.hgetall(task_entries[i])
                         for i in range(len(task_entries))]
        task_statuses = [int(contents[b"state"]) for contents in task_contents]
        self.assertTrue(all([status in [TASK_STATUS_WAITING,
                                        TASK_STATUS_SCHEDULED,
                                        TASK_STATUS_QUEUED]
                             for status in task_statuses]))
        num_tasks_done = task_statuses.count(TASK_STATUS_QUEUED)
        num_tasks_scheduled = task_statuses.count(TASK_STATUS_SCHEDULED)
        num_tasks_waiting = task_statuses.count(TASK_STATUS_WAITING)
        print("tasks in Redis = {}, tasks waiting = {}, tasks scheduled = {}, "
              "tasks queued = {}, retries left = {}"
              .format(len(task_entries), num_tasks_waiting,
                      num_tasks_scheduled, num_tasks_done, num_retries))
        if all([status == TASK_STATUS_QUEUED for status in task_statuses]):
          # We're done, so pass.
          break
      num_retries -= 1
      time.sleep(0.1)

    if num_tasks_done != num_tasks:
      # At least one of the tasks failed to schedule.
      self.tearDown()
      sys.exit(2)
Beispiel #6
0
    def test_scheduling_when_objects_evicted(self):
        # Create a task with two dependencies and submit it.
        object_id1 = random_object_id()
        object_id2 = random_object_id()
        task = local_scheduler.Task(random_driver_id(), random_function_id(),
                                    [object_id1, object_id2], 0,
                                    random_task_id(), 0)
        self.local_scheduler_client.submit(task)

        # Launch a thread to get the task.
        def get_task():
            self.local_scheduler_client.get_task()

        t = threading.Thread(target=get_task)
        t.start()

        # Make one of the dependencies available.
        self.plasma_client.create(object_id1.id(), 1)
        self.plasma_client.seal(object_id1.id())
        # Check that the thread is still waiting for a task.
        time.sleep(0.1)
        self.assertTrue(t.is_alive())

        # Force eviction of the first dependency.
        num_objects = 4
        object_size = plasma.DEFAULT_PLASMA_STORE_MEMORY // num_objects
        for i in range(num_objects + 1):
            object_id = random_object_id()
            self.plasma_client.create(object_id.id(), object_size)
            self.plasma_client.seal(object_id.id())
        # Check that the thread is still waiting for a task.
        time.sleep(0.1)
        self.assertTrue(t.is_alive())
        # Check that the first object dependency was evicted.
        object1 = self.plasma_client.get([object_id1.id()], timeout_ms=0)
        self.assertEqual(object1, [None])
        # Check that the thread is still waiting for a task.
        time.sleep(0.1)
        self.assertTrue(t.is_alive())

        # Create the second dependency.
        self.plasma_client.create(object_id2.id(), 1)
        self.plasma_client.seal(object_id2.id())
        # Check that the thread is still waiting for a task.
        time.sleep(0.1)
        self.assertTrue(t.is_alive())

        # Create the first dependency again. Both dependencies are now available.
        self.plasma_client.create(object_id1.id(), 1)
        self.plasma_client.seal(object_id1.id())

        # Wait until the thread finishes so that we know the task was scheduled.
        t.join()
Beispiel #7
0
    def test_integration_single_task(self):
        # There should be three db clients, the global scheduler, the local
        # scheduler, and the plasma manager.
        self.assertEqual(len(self.state.client_table()[self.node_ip_address]),
                         2 * NUM_CLUSTER_NODES + 1)

        num_return_vals = [0, 1, 2, 3, 5, 10]
        # Insert the object into Redis.
        data_size = 0xf1f0
        metadata_size = 0x40
        plasma_client = self.plasma_clients[0]
        object_dep, memory_buffer, metadata = create_object(plasma_client,
                                                            data_size,
                                                            metadata_size,
                                                            seal=True)

        # Sleep before submitting task to local scheduler.
        time.sleep(0.1)
        # Submit a task to Redis.
        task = local_scheduler.Task(
            random_driver_id(), random_function_id(),
            [local_scheduler.ObjectID(object_dep.binary())],
            num_return_vals[0], random_task_id(), 0)
        self.local_scheduler_clients[0].submit(task)
        time.sleep(0.1)
        # There should now be a task in Redis, and it should get assigned to
        # the local scheduler
        num_retries = 10
        while num_retries > 0:
            task_entries = self.state.task_table()
            self.assertLessEqual(len(task_entries), 1)
            if len(task_entries) == 1:
                task_id, task = task_entries.popitem()
                task_status = task["State"]
                self.assertTrue(task_status in [
                    state.TASK_STATUS_WAITING, state.TASK_STATUS_SCHEDULED,
                    state.TASK_STATUS_QUEUED
                ])
                if task_status == state.TASK_STATUS_QUEUED:
                    break
                else:
                    print(task_status)
            print("The task has not been scheduled yet, trying again.")
            num_retries -= 1
            time.sleep(1)

        if num_retries <= 0 and task_status != state.TASK_STATUS_QUEUED:
            # Failed to submit and schedule a single task -- bail.
            self.tearDown()
            sys.exit(1)
Beispiel #8
0
    def test_integration_single_task(self):
        # There should be three db clients, the global scheduler, the local
        # scheduler, and the plasma manager.
        self.assertEqual(
            len(self.redis_client.keys("{}*".format(DB_CLIENT_PREFIX))),
            2 * NUM_CLUSTER_NODES + 1)

        num_return_vals = [0, 1, 2, 3, 5, 10]
        # Insert the object into Redis.
        data_size = 0xf1f0
        metadata_size = 0x40
        plasma_client = self.plasma_clients[0]
        object_dep, memory_buffer, metadata = create_object(plasma_client,
                                                            data_size,
                                                            metadata_size,
                                                            seal=True)

        # Sleep before submitting task to local scheduler.
        time.sleep(0.1)
        # Submit a task to Redis.
        task = local_scheduler.Task(random_driver_id(), random_function_id(),
                                    [local_scheduler.ObjectID(object_dep)],
                                    num_return_vals[0], random_task_id(), 0)
        self.local_scheduler_clients[0].submit(task)
        time.sleep(0.1)
        # There should now be a task in Redis, and it should get assigned to the
        # local scheduler
        num_retries = 10
        while num_retries > 0:
            task_entries = self.redis_client.keys("{}*".format(TASK_PREFIX))
            self.assertLessEqual(len(task_entries), 1)
            if len(task_entries) == 1:
                task_contents = self.redis_client.hgetall(task_entries[0])
                task_status = int(task_contents[b"state"])
                self.assertTrue(task_status in [
                    TASK_STATUS_WAITING, TASK_STATUS_SCHEDULED,
                    TASK_STATUS_QUEUED
                ])
                if task_status == TASK_STATUS_QUEUED:
                    break
                else:
                    print(task_status)
            print("The task has not been scheduled yet, trying again.")
            num_retries -= 1
            time.sleep(1)

        if num_retries <= 0 and task_status != TASK_STATUS_QUEUED:
            # Failed to submit and schedule a single task -- bail.
            self.tearDown()
            sys.exit(1)
Beispiel #9
0
  def test_scheduling_when_objects_ready(self):
    # Create a task and submit it.
    object_id = random_object_id()
    task = local_scheduler.Task(random_driver_id(), random_function_id(),
                                [object_id], 0, random_task_id(), 0)
    self.local_scheduler_client.submit(task)

    # Launch a thread to get the task.
    def get_task():
      self.local_scheduler_client.get_task()
    t = threading.Thread(target=get_task)
    t.start()
    # Sleep to give the thread time to call get_task.
    time.sleep(0.1)
    # Create and seal the object ID in the object store. This should trigger a
    # scheduling event.
    self.plasma_client.create(object_id.id(), 0)
    self.plasma_client.seal(object_id.id())
    # Wait until the thread finishes so that we know the task was scheduled.
    t.join()
Beispiel #10
0
    def integration_many_tasks_helper(self, timesync=True):
        # There should be three db clients, the global scheduler, the local
        # scheduler, and the plasma manager.
        self.assertEqual(len(self.state.client_table()[self.node_ip_address]),
                         2 * NUM_CLUSTER_NODES + 1)
        num_return_vals = [0, 1, 2, 3, 5, 10]

        # Submit a bunch of tasks to Redis.
        num_tasks = 1000
        for _ in range(num_tasks):
            # Create a new object for each task.
            data_size = np.random.randint(1 << 12)
            metadata_size = np.random.randint(1 << 9)
            plasma_client = self.plasma_clients[0]
            object_dep, memory_buffer, metadata = create_object(plasma_client,
                                                                data_size,
                                                                metadata_size,
                                                                seal=True)
            if timesync:
                # Give 10ms for object info handler to fire (long enough to
                # yield CPU).
                time.sleep(0.010)
            task = local_scheduler.Task(
                random_driver_id(), random_function_id(),
                [local_scheduler.ObjectID(object_dep.binary())],
                num_return_vals[0], random_task_id(), 0)
            self.local_scheduler_clients[0].submit(task)
        # Check that there are the correct number of tasks in Redis and that
        # they all get assigned to the local scheduler.
        num_retries = 10
        num_tasks_done = 0
        while num_retries > 0:
            task_entries = self.state.task_table()
            self.assertLessEqual(len(task_entries), num_tasks)
            # First, check if all tasks made it to Redis.
            if len(task_entries) == num_tasks:
                task_statuses = [
                    task_entry["State"]
                    for task_entry in task_entries.values()
                ]
                self.assertTrue(
                    all([
                        status in [
                            state.TASK_STATUS_WAITING,
                            state.TASK_STATUS_SCHEDULED,
                            state.TASK_STATUS_QUEUED
                        ] for status in task_statuses
                    ]))
                num_tasks_done = task_statuses.count(state.TASK_STATUS_QUEUED)
                num_tasks_scheduled = task_statuses.count(
                    state.TASK_STATUS_SCHEDULED)
                num_tasks_waiting = task_statuses.count(
                    state.TASK_STATUS_WAITING)
                print("tasks in Redis = {}, tasks waiting = {}, "
                      "tasks scheduled = {}, "
                      "tasks queued = {}, retries left = {}".format(
                          len(task_entries), num_tasks_waiting,
                          num_tasks_scheduled, num_tasks_done, num_retries))
                if all([
                        status == state.TASK_STATUS_QUEUED
                        for status in task_statuses
                ]):
                    # We're done, so pass.
                    break
            num_retries -= 1
            time.sleep(0.1)

        self.assertEqual(num_tasks_done, num_tasks)