Ejemplo n.º 1
0
    def test_cron_tidy_stale(self):
        now = datetime.datetime(2010, 1, 2, 3, 4, 5)
        self.mock_now(now)
        self.assertEqual(0, _assert_bot())
        request = self._assert_task()
        exp = (request.expiration_ts -
               request.created_ts) + task_queues._ADVANCE
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([2980491642], task_queues.get_queues(u'bot1'))

        # No-op.
        task_queues.cron_tidy_stale()
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([2980491642], task_queues.get_queues(u'bot1'))

        # One second before expiration.
        self.mock_now(now, exp.total_seconds())
        task_queues.cron_tidy_stale()
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([2980491642], task_queues.get_queues(u'bot1'))

        # TaskDimension expired.
        self.mock_now(now, exp.total_seconds() + 1)
        task_queues.cron_tidy_stale()
        self.assert_count(0, task_queues.BotTaskDimensions)
        self.assert_count(0, task_queues.TaskDimensions)
        self.assertEqual([], task_queues.get_queues(u'bot1'))
Ejemplo n.º 2
0
    def test_assert_task_async_call_rebuld_task_cache_async(self):
        self.assertEqual(0, _assert_bot())
        dimensions = {
            u'id': [u'bot1'],
        }
        self.mock_now(datetime.datetime(2020, 1, 2, 3, 4, 5))
        request1 = _gen_request(properties=_gen_properties(
            dimensions=dimensions))
        task_queues.assert_task_async(request1).get_result()
        self.assert_count(1, task_queues.BotDimensions)
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        bot_root_key = bot_management.get_root_key('bot1')
        self.assertEqual(1, len(task_queues.get_queues(bot_root_key)))

        # expire BotTaskDimensions by changing time.
        memcache.flush_all()
        bot_task_dimensions = task_queues.BotTaskDimensions.query(
            ancestor=bot_root_key).fetch()[0]
        self.mock_now(bot_task_dimensions.valid_until_ts +
                      datetime.timedelta(seconds=1))
        self.assertEqual(0, len(task_queues.get_queues(bot_root_key)))

        # request a task with the same dimensions.
        memcache.flush_all()
        request2 = _gen_request(properties=_gen_properties(
            dimensions=dimensions))
        task_queues.assert_task_async(request2).get_result()
        self.assert_count(1, task_queues.BotDimensions)
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual(1, len(task_queues.get_queues(bot_root_key)))
Ejemplo n.º 3
0
    def test_assert_bot_dimensions_changed(self):
        # Ensure that stale BotTaskDimensions are deleted when the bot dimensions
        # changes.
        now = datetime.datetime(2010, 1, 2, 3, 4, 5)
        self.mock_now(now)
        request = self._assert_task()
        exp = (request.expiration_ts -
               request.created_ts) + task_queues._ADVANCE
        self.assertEqual(1, _assert_bot())
        # One hour later, the bot changes dimensions.
        self.mock_now(now, task_queues._ADVANCE.total_seconds())
        self.assertEqual(1, _assert_bot({u'gpu': u'Matrox'}))
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([2980491642], task_queues.get_queues(u'bot1'))

        # One second before expiration.
        self.mock_now(now, exp.total_seconds())
        self.assertEqual(None, _assert_bot({u'gpu': u'Matrox'}))
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([2980491642], task_queues.get_queues(u'bot1'))

        # TaskDimension expired. The fact that the bot changed dimensions after an
        # hour didn't impact BotTaskDimensions expiration.
        self.mock_now(now, exp.total_seconds() + 1)
        self.assertEqual(0, _assert_bot())
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([], task_queues.get_queues(u'bot1'))
Ejemplo n.º 4
0
    def test_or_dimensions_new_tasks(self):
        # Bots are already registered, then new tasks show up
        self.mock_now(datetime.datetime(2020, 1, 2, 3, 4, 5))
        self.assertEqual(
            0,
            _assert_bot(bot_id=u'bot1',
                        dimensions={
                            u'os': [u'v1', u'v2'],
                            u'gpu': [u'nv'],
                        }))
        self.assertEqual(
            0,
            _assert_bot(bot_id=u'bot2',
                        dimensions={
                            u'os': [u'v2'],
                            u'gpu': [u'amd'],
                        }))

        payloads = self._mock_enqueue_task_async_for_rebuild_task_cache()

        request1 = _gen_request(properties=_gen_properties(
            dimensions={
                u'pool': [u'default'],
                u'os': [u'v1|v2'],
                u'gpu': [u'nv|amd'],
            }))
        task_queues.assert_task_async(request1).get_result()
        self.assertEqual(1, len(payloads))
        f = task_queues.rebuild_task_cache_async(payloads[-1])
        self.assertEqual(True, f.get_result())
        payloads.pop()

        # Both bots should be able to handle |request1|
        self.assert_count(2, task_queues.BotDimensions)
        self.assert_count(2, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual(4, len(task_queues.TaskDimensions.query().get().sets))
        bot1_root_key = bot_management.get_root_key(u'bot1')
        bot2_root_key = bot_management.get_root_key(u'bot2')
        self.assertEqual(1, len(task_queues.get_queues(bot1_root_key)))
        self.assertEqual(1, len(task_queues.get_queues(bot2_root_key)))

        request2 = _gen_request(properties=_gen_properties(
            dimensions={
                u'pool': [u'default'],
                u'os': [u'v1'],
                u'gpu': [u'nv|amd'],
            }))
        task_queues.assert_task_async(request2).get_result()
        self.assertEqual(1, len(payloads))
        f = task_queues.rebuild_task_cache_async(payloads[-1])
        self.assertEqual(True, f.get_result())
        payloads.pop()

        # Only bot1 can handle |request2|
        self.assert_count(3, task_queues.BotTaskDimensions)
        self.assert_count(2, task_queues.TaskDimensions)
        self.assertEqual(2, len(task_queues.get_queues(bot1_root_key)))
        self.assertEqual(1, len(task_queues.get_queues(bot2_root_key)))
Ejemplo n.º 5
0
 def test_assert_task_then_bot(self):
     self._assert_task()
     self.assertEqual(1, _assert_bot())
     self.assert_count(1, task_queues.BotDimensions)
     self.assert_count(1, task_queues.BotTaskDimensions)
     self.assert_count(1, task_queues.TaskDimensions)
     self.assertEqual([2980491642], task_queues.get_queues(u'bot1'))
Ejemplo n.º 6
0
    def test_probably_has_capacity_get_queues(self):
        d = {u'pool': [u'default'], u'os': [u'Ubuntu-16.04']}
        # Capacity registers there only once there's a request enqueued and
        # get_queues() is called.
        _assert_bot()
        request = _gen_request(properties=_gen_properties(dimensions=d))
        task_queues.assert_task_async(request).get_result()
        self.assertEqual(1, self.execute_tasks())
        self.assertEqual(None, task_queues.probably_has_capacity(d))

        # It get sets only once get_queues() is called.
        bot_root_key = bot_management.get_root_key(u'bot1')
        task_queues.get_queues(bot_root_key)
        self.assertEqual(True, task_queues.probably_has_capacity(d))
        self.assertEqual([1843498234],
                         memcache.get('bot1', namespace='task_queues'))
Ejemplo n.º 7
0
 def test_assert_task_async_then_bot(self):
     self._assert_task()
     self.assertEqual(1, _assert_bot())
     self.assert_count(1, task_queues.BotDimensions)
     self.assert_count(1, task_queues.BotTaskDimensions)
     self.assert_count(1, task_queues.TaskDimensions)
     bot_root_key = bot_management.get_root_key(u'bot1')
     self.assertEqual([2980491642], task_queues.get_queues(bot_root_key))
Ejemplo n.º 8
0
    def test_cron_tidy_stale(self):
        now = datetime.datetime(2010, 1, 2, 3, 4, 5)
        self.mock_now(now)
        self.assertEqual(0, _assert_bot())
        request = self._assert_task()
        exp = (request.expiration_ts - request.created_ts +
               task_queues._EXTEND_VALIDITY)
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        bot_root_key = bot_management.get_root_key(u'bot1')
        self.assertEqual([2980491642], task_queues.get_queues(bot_root_key))

        # No-op.
        task_queues.cron_tidy_stale()
        self.assert_count(1, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([2980491642], task_queues.get_queues(bot_root_key))

        # TaskDimension expired but is still kept; get_queues() doesn't return it
        # anymore even if still in the DB. BotTaskDimensions was evicted.
        self.mock_now(now, exp.total_seconds() + 1)
        task_queues.cron_tidy_stale()
        self.assert_count(0, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([], task_queues.get_queues(bot_root_key))

        # Just before _KEEP_DEAD.
        self.mock_now(now, (exp + task_queues._KEEP_DEAD).total_seconds())
        task_queues.cron_tidy_stale()
        self.assert_count(0, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual([], task_queues.get_queues(bot_root_key))

        # TaskDimension expired, after KEEP_DEAD.
        self.mock_now(now, (exp + task_queues._KEEP_DEAD).total_seconds() + 1)
        task_queues.cron_tidy_stale()
        self.assert_count(0, task_queues.BotTaskDimensions)
        self.assert_count(0, task_queues.TaskDimensions)
        self.assertEqual([], task_queues.get_queues(bot_root_key))
Ejemplo n.º 9
0
    def test_or_dimensions_same_hash(self):
        self.mock_now(datetime.datetime(2020, 1, 2, 3, 4, 5))
        self.assertEqual(
            0, _assert_bot(bot_id=u'bot1', dimensions={u'os': [u'v1']}))
        self.assertEqual(
            0, _assert_bot(bot_id=u'bot2', dimensions={u'os': [u'v2']}))
        self.assertEqual(
            0, _assert_bot(bot_id=u'bot3', dimensions={u'os': [u'v3']}))

        payloads = self._mock_enqueue_task_async_for_rebuild_task_cache()
        # Both requests should have the same dimension_hash
        request1 = _gen_request(properties=_gen_properties(dimensions={
            u'pool': [u'default'],
            u'os': [u'v1|v2|v3'],
        }))
        request2 = _gen_request(properties=_gen_properties(dimensions={
            u'pool': [u'default'],
            u'os': [u'v3|v2|v1'],
        }))
        task_queues.assert_task_async(request1).get_result()
        task_queues.assert_task_async(request2).get_result()
        self.assertEqual(2, len(payloads))
        while payloads:
            f = task_queues.rebuild_task_cache_async(payloads[-1])
            self.assertEqual(True, f.get_result())
            payloads.pop()

        self.assert_count(3, task_queues.BotDimensions)
        self.assert_count(3, task_queues.BotTaskDimensions)
        self.assert_count(1, task_queues.TaskDimensions)
        self.assertEqual(3, len(task_queues.TaskDimensions.query().get().sets))
        bot1_root_key = bot_management.get_root_key(u'bot1')
        bot2_root_key = bot_management.get_root_key(u'bot2')
        bot3_root_key = bot_management.get_root_key(u'bot3')
        self.assertEqual(1, len(task_queues.get_queues(bot1_root_key)))
        self.assertEqual(1, len(task_queues.get_queues(bot2_root_key)))
        self.assertEqual(1, len(task_queues.get_queues(bot3_root_key)))
Ejemplo n.º 10
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        task_id = request['task_id']

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_auth.validate_bot_id_and_fetch_config(bot_id)

        bot_overhead = request.get('bot_overhead')
        cipd_pins = request.get('cipd_pins')
        cipd_stats = request.get('cipd_stats')
        cost_usd = request.get('cost_usd', 0)
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if (isolated_stats or cipd_stats) and bot_overhead is None:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error=
                'isolated_stats and cipd_stats require bot_overhead to be set'
                '\nbot_overhead: %s\nisolate_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if bot_overhead is not None:
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead)
            if isolated_stats:
                download = isolated_stats.get('download') or {}
                upload = isolated_stats.get('upload') or {}

                def unpack_base64(d, k):
                    x = d.get(k)
                    if x:
                        return base64.b64decode(x)

                performance_stats.isolated_download = task_result.OperationStats(
                    duration=download.get('duration'),
                    initial_number_items=download.get('initial_number_items'),
                    initial_size=download.get('initial_size'),
                    items_cold=unpack_base64(download, 'items_cold'),
                    items_hot=unpack_base64(download, 'items_hot'))
                performance_stats.isolated_upload = task_result.OperationStats(
                    duration=upload.get('duration'),
                    items_cold=unpack_base64(upload, 'items_cold'),
                    items_hot=unpack_base64(upload, 'items_hot'))
            if cipd_stats:
                performance_stats.package_installation = task_result.OperationStats(
                    duration=cipd_stats.get('duration'))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        if cipd_pins:
            cipd_pins = task_result.CipdPins(
                client_package=task_request.CipdPackage(
                    **cipd_pins['client_package']),
                packages=[
                    task_request.CipdPackage(**args)
                    for args in cipd_pins['packages']
                ])

        # Tell the task queues management engine that the bot is still alive, and
        # it shall refresh the task queues.
        bot_root_key = bot_management.get_root_key(bot_id)
        task_queues.get_queues(bot_root_key)

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                cipd_pins=cipd_pins,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            elif state == task_result.State.KILLED:
                action = 'task_killed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                authenticated_as=auth.get_peer_identity().to_bytes(),
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                maintenance_msg=None,
                task_id=task_id,
                task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))
        # - BOT_DIED will occur when the following conditions are true:
        #   - The bot polled correctly, but then stopped updating for at least
        #     task_result.BOT_PING_TOLERANCE. (It can occur if the host went to
        #     sleep, or the OS was overwhelmed).
        #   - /internal/cron/abort_bot_died runs, detects the bot is MIA, kills the
        #     task.
        #   - Bot wakes up, starts sending updates again.
        # - KILLED is when the client uses the kill API to forcibly stop a running
        #   task.
        must_stop = state in (task_result.State.BOT_DIED,
                              task_result.State.KILLED)
        if must_stop:
            logging.info('asking bot to kill the task')
        self.send_response({'must_stop': must_stop, 'ok': True})
Ejemplo n.º 11
0
    def test_rebuild_task_cache_async(self):
        # Assert that expiration works.
        now = datetime.datetime(2010, 1, 2, 3, 4, 5)
        self.mock_now(now)

        # We want _yield_BotTaskDimensions_keys() to return multiple
        # BotTaskDimensions ndb.Key to confirm that the inner loops work. This
        # requires a few bots.
        _assert_bot(bot_id=u'bot1')
        _assert_bot(bot_id=u'bot2')
        _assert_bot(bot_id=u'bot3')
        bot_root_key = bot_management.get_root_key(u'bot1')
        self.assertEqual(0, task_queues.BotTaskDimensions.query().count())
        self.assertEqual(0, task_queues.TaskDimensions.query().count())

        # Intentionally force the code to throttle the number of concurrent RPCs,
        # otherwise the inner loops wouldn't be reached with less than 50 bots, and
        # testing with 50 bots, would make the unit test slow.
        self.mock(task_queues, '_CAP_FUTURES_LIMIT', 1)

        payloads = self._mock_enqueue_task_async_for_rebuild_task_cache()

        # The equivalent of self._assert_task(tasks=1) except that we snapshot the
        # payload.
        # Trigger multiple task queues to go deeper in the code.
        request_1 = _gen_request(properties=_gen_properties(dimensions={
            u'cpu': [u'x86-64'],
            u'pool': [u'default'],
        }))
        task_queues.assert_task_async(request_1).get_result()
        self.assertEqual(1, len(payloads))
        f = task_queues.rebuild_task_cache_async(payloads[-1])
        self.assertEqual(True, f.get_result())
        self.assertEqual(3, task_queues.BotTaskDimensions.query().count())
        self.assertEqual(1, task_queues.TaskDimensions.query().count())
        self.assertEqual(60, request_1.expiration_secs)
        expected = now + task_queues._EXTEND_VALIDITY + datetime.timedelta(
            seconds=request_1.expiration_secs)
        self.assertEqual(
            expected,
            task_queues.TaskDimensions.query().get().valid_until_ts)

        request_2 = _gen_request(properties=_gen_properties(
            dimensions={
                u'os': [u'Ubuntu-16.04'],
                u'pool': [u'default'],
            }))
        task_queues.assert_task_async(request_2).get_result()
        self.assertEqual(2, len(payloads))
        f = task_queues.rebuild_task_cache_async(payloads[-1])
        self.assertEqual(True, f.get_result())
        self.assertEqual(6, task_queues.BotTaskDimensions.query().count())
        self.assertEqual(2, task_queues.TaskDimensions.query().count())
        self.assertEqual([227177418, 1843498234],
                         task_queues.get_queues(bot_root_key))
        memcache.flush_all()
        self.assertEqual([227177418, 1843498234],
                         task_queues.get_queues(bot_root_key))

        # Now expire the two TaskDimensions, one at a time, and rebuild the task
        # queue.
        offset = (task_queues._EXTEND_VALIDITY + datetime.timedelta(
            seconds=request_1.expiration_secs)).total_seconds() + 1
        self.mock_now(now, offset)
        f = task_queues.rebuild_task_cache_async(payloads[0])
        self.assertEqual(True, f.get_result())
        self.assertEqual(6, task_queues.BotTaskDimensions.query().count())
        self.assertEqual(2, task_queues.TaskDimensions.query().count())
        self.assertEqual([227177418, 1843498234],
                         task_queues.get_queues(bot_root_key))
        # Observe the effect of memcache. See comment in get_queues().
        memcache.flush_all()
        self.assertEqual([], task_queues.get_queues(bot_root_key))

        # Re-running still do not delete TaskDimensions because they are kept until
        # _KEEP_DEAD.
        f = task_queues.rebuild_task_cache_async(payloads[1])
        self.assertEqual(True, f.get_result())
        self.assertEqual(6, task_queues.BotTaskDimensions.query().count())
        self.assertEqual(2, task_queues.TaskDimensions.query().count())
        self.assertEqual([], task_queues.get_queues(bot_root_key))

        # Get past _KEEP_DEAD.
        offset = (task_queues._EXTEND_VALIDITY + task_queues._KEEP_DEAD +
                  datetime.timedelta(seconds=request_1.expiration_secs)
                  ).total_seconds() + 1
        self.mock_now(now, offset)
        self.assertEqual([], task_queues.get_queues(bot_root_key))
        f = task_queues.rebuild_task_cache_async(payloads[0])
        self.assertEqual(True, f.get_result())
        self.assertEqual(6, task_queues.BotTaskDimensions.query().count())
        self.assertEqual(1, task_queues.TaskDimensions.query().count())
        self.assertEqual([], task_queues.get_queues(bot_root_key))
Ejemplo n.º 12
0
def _yield_potential_tasks(bot_id):
    """Queries all the known task queues in parallel and yields the task in order
  of priority.

  The ordering is opportunistic, not strict. There's a risk of not returning
  exactly in the priority order depending on index staleness and query execution
  latency. The number of queries is unbounded.

  Yields:
    TaskToRun entities, trying to yield the highest priority one first. To have
    finite execution time, starts yielding results once one of these conditions
    are met:
    - 1 second elapsed; in this case, continue iterating in the background
    - First page of every query returned
    - All queries exhausted
  """
    potential_dimensions_hashes = task_queues.get_queues(bot_id)
    # Note that the default ndb.EVENTUAL_CONSISTENCY is used so stale items may be
    # returned. It's handled specifically by consumers of this function.
    start = time.time()
    queries = [_get_task_to_run_query(d) for d in potential_dimensions_hashes]
    yielders = [_yield_pages_async(q, 10) for q in queries]
    # We do care about the first page of each query so we cannot merge all the
    # results of every query insensibly.
    futures = []

    try:
        for y in yielders:
            futures.append(next(y, None))

        while (time.time() - start) < 1 and not all(f.done()
                                                    for f in futures if f):
            r = ndb.eventloop.run0()
            if r is None:
                break
            time.sleep(r)
        logging.debug(
            '_yield_potential_tasks(%s): waited %.3fs for %d items from %d Futures',
            bot_id,
            time.time() - start,
            sum(len(f.get_result()) for f in futures if f.done()),
            len(futures))
        # items is a list of TaskToRun. The entities are needed because property
        # queue_number is used to sort according to each task's priority.
        items = []
        for i, f in enumerate(futures):
            if f and f.done():
                # The ndb.Future returns a list of up to 10 TaskToRun entities.
                r = f.get_result()
                if r:
                    # The ndb.Query ask for a valid queue_number but under load, it
                    # happens the value is not valid anymore.
                    items.extend(i for i in r if i.queue_number)
                    # Prime the next page, in case.
                    futures[i] = next(yielders[i], None)

        # That's going to be our search space for now.
        items.sort(key=_queue_number_fifo_priority)

        # It is possible that there is no items yet, in case all futures are taking
        # more than 1 second.
        # It is possible that all futures are done if every queue has less than 10
        # task pending.
        while any(futures) or items:
            if items:
                yield items[0]
                items = items[1:]
            else:
                # Let activity happen.
                ndb.eventloop.run1()
            changed = False
            for i, f in enumerate(futures):
                if f and f.done():
                    # See loop above for explanation.
                    items.extend(i for i in f.get_result() if i.queue_number)
                    futures[i] = next(yielders[i], None)
                    changed = True
            if changed:
                items.sort(key=_queue_number_fifo_priority)
    except apiproxy_errors.DeadlineExceededError as e:
        # This is normally due to: "The API call datastore_v3.RunQuery() took too
        # long to respond and was cancelled."
        # At that point, the Cloud DB index is not able to sustain the load. So the
        # best thing to do is to back off a bit and not return any task to the bot
        # for this poll.
        logging.error(
            'Failed to yield a task due to an RPC timeout. Returning no '
            'task to the bot: %s', e)