def _yield_next_available_task_to_dispatch(bot_dimensions, deadline):
  return [
    _task_to_run_to_dict(to_run)
    for _request, to_run in
        task_to_run.yield_next_available_task_to_dispatch(
            bot_dimensions, deadline)
  ]
Exemple #2
0
def bot_reap_task(dimensions, bot_id, bot_version, deadline):
    """Reaps a TaskToRun if one is available.

  The process is to find a TaskToRun where its .queue_number is set, then
  create a TaskRunResult for it.

  Returns:
    tuple of (TaskRequest, TaskRunResult) for the task that was reaped.
    The TaskToRun involved is not returned.
  """
    assert bot_id
    q = task_to_run.yield_next_available_task_to_dispatch(dimensions, deadline)
    # When a large number of bots try to reap hundreds of tasks simultaneously,
    # they'll constantly fail to call reap_task_to_run() as they'll get preempted
    # by other bots. So randomly jump farther in the queue when the number of
    # failures is too large.
    failures = 0
    to_skip = 0
    total_skipped = 0
    for request, to_run in q:
        if to_skip:
            to_skip -= 1
            total_skipped += 1
            continue

        run_result = _reap_task(to_run.key, request, bot_id, bot_version,
                                dimensions)
        if not run_result:
            failures += 1
            # Every 3 failures starting on the very first one, jump randomly ahead of
            # the pack. This reduces the contention where hundreds of bots fight for
            # exactly the same task while there's many ready to be run waiting in the
            # queue.
            if (failures % 3) == 1:
                # TODO(maruel): Choose curve that makes the most sense. The tricky part
                # is finding a good heuristic to guess the load without much information
                # available in this content. When 'failures' is high, this means a lot
                # of bots are reaping tasks like crazy, which means there is a good flow
                # of tasks going on. On the other hand, skipping too much is useless. So
                # it should have an initial bump but then slow down on skipping.
                to_skip = min(int(round(random.gammavariate(3, 1))), 30)
            continue

        # Try to optimize these values but do not add as formal stats (yet).
        logging.info('failed %d, skipped %d', failures, total_skipped)

        pending_time = run_result.started_ts - request.created_ts
        stats.add_run_entry('run_started',
                            run_result.key,
                            bot_id=bot_id,
                            dimensions=request.properties.dimensions,
                            pending_ms=_secs_to_ms(
                                pending_time.total_seconds()),
                            user=request.user)
        return request, run_result
    if failures:
        logging.info('Chose nothing (failed %d, skipped %d)', failures,
                     total_skipped)
    return None, None
Exemple #3
0
def bot_reap_task(dimensions, bot_id, bot_version):
    """Reaps a TaskToRun if one is available.

  The process is to find a TaskToRun where its .queue_number is set, then
  create a TaskRunResult for it.

  Returns:
    tuple of (TaskRequest, TaskRunResult) for the task that was reaped.
    The TaskToRun involved is not returned.
  """
    assert bot_id
    q = task_to_run.yield_next_available_task_to_dispatch(dimensions)
    # When a large number of bots try to reap hundreds of tasks simultaneously,
    # they'll constantly fail to call reap_task_to_run() as they'll get preempted
    # by other bots. So randomly jump farther in the queue when the number of
    # failures is too large.
    failures = 0
    to_skip = 0
    total_skipped = 0
    for request, to_run in q:
        if to_skip:
            to_skip -= 1
            total_skipped += 1
            continue

        run_result = _reap_task(to_run.key, request, bot_id, bot_version, dimensions)
        if not run_result:
            failures += 1
            # Every 3 failures starting on the very first one, jump randomly ahead of
            # the pack. This reduces the contention where hundreds of bots fight for
            # exactly the same task while there's many ready to be run waiting in the
            # queue.
            if (failures % 3) == 1:
                # TODO(maruel): Choose curve that makes the most sense. The tricky part
                # is finding a good heuristic to guess the load without much information
                # available in this content. When 'failures' is high, this means a lot
                # of bots are reaping tasks like crazy, which means there is a good flow
                # of tasks going on. On the other hand, skipping too much is useless. So
                # it should have an initial bump but then slow down on skipping.
                to_skip = min(int(round(random.gammavariate(3, 1))), 30)
            continue

        # Try to optimize these values but do not add as formal stats (yet).
        logging.info("failed %d, skipped %d", failures, total_skipped)

        pending_time = run_result.started_ts - request.created_ts
        stats.add_run_entry(
            "run_started",
            run_result.key,
            bot_id=bot_id,
            dimensions=request.properties.dimensions,
            pending_ms=_secs_to_ms(pending_time.total_seconds()),
            user=request.user,
        )
        return request, run_result
    if failures:
        logging.info("Chose nothing (failed %d, skipped %d)", failures, total_skipped)
    return None, None
Exemple #4
0
def _yield_next_available_task_to_dispatch(bot_dimensions):
  bot_id = bot_dimensions[u'id'][0]
  bot_management.bot_event(
      'bot_connected', bot_id, '1.2.3.4', 'joe@localhost',
      bot_dimensions, {'state': 'real'}, '1234', False, None, None, None)
  bot_root_key = bot_management.get_root_key(bot_id)
  task_queues.assert_bot_async(bot_root_key, bot_dimensions).get_result()
  return [
    to_run.to_dict()
    for _request, to_run in
    task_to_run.yield_next_available_task_to_dispatch(bot_dimensions)
  ]
Exemple #5
0
def bot_reap_task(bot_dimensions, bot_version, deadline):
  """Reaps a TaskToRun if one is available.

  The process is to find a TaskToRun where its .queue_number is set, then
  create a TaskRunResult for it.

  Returns:
    tuple of (TaskRequest, SecretBytes, TaskRunResult) for the task that was
    reaped. The TaskToRun involved is not returned.
  """
  start = time.time()
  bot_id = bot_dimensions[u'id'][0]
  iterated = 0
  reenqueued = 0
  expired = 0
  failures = 0
  stale_index = 0
  try:
    q = task_to_run.yield_next_available_task_to_dispatch(
        bot_dimensions, deadline)
    for request, to_run in q:
      iterated += 1
      if request.expiration_ts < utils.utcnow():
        s, r = _expire_task(to_run.key, request)
        if r:
          # Expiring a TaskToRun for TaskSlice may reenqueue a new TaskToRun.
          # It'll be processed accordingly but not handled here.
          reenqueued += 1
        elif s:
          expired += 1
        else:
          stale_index += 1
        continue
      run_result, secret_bytes = _reap_task(
          bot_dimensions, bot_version, to_run.key, request)
      if not run_result:
        failures += 1
        # Sad thing is that there is not way here to know the try number.
        logging.info(
            'failed to reap: %s0',
            task_pack.pack_request_key(to_run.request_key))
        continue

      logging.info('Reaped: %s', run_result.task_id)
      return request, secret_bytes, run_result
    return None, None, None
  finally:
    logging.debug(
        'bot_reap_task(%s) in %.3fs: %d iterated, %d reenqueued, %d expired, '
        '%d stale_index, %d failured',
        bot_id, time.time()-start, iterated, reenqueued, expired, stale_index,
        failures)
Exemple #6
0
def _yield_next_available_task_to_dispatch(bot_dimensions):
    return [
        _task_to_run_to_dict(to_run) for _request, to_run in
        task_to_run.yield_next_available_task_to_dispatch(bot_dimensions)
    ]