def _yield_next_available_task_to_dispatch(bot_dimensions, deadline): return [ _task_to_run_to_dict(to_run) for _request, to_run in task_to_run.yield_next_available_task_to_dispatch( bot_dimensions, deadline) ]
def bot_reap_task(dimensions, bot_id, bot_version, deadline): """Reaps a TaskToRun if one is available. The process is to find a TaskToRun where its .queue_number is set, then create a TaskRunResult for it. Returns: tuple of (TaskRequest, TaskRunResult) for the task that was reaped. The TaskToRun involved is not returned. """ assert bot_id q = task_to_run.yield_next_available_task_to_dispatch(dimensions, deadline) # When a large number of bots try to reap hundreds of tasks simultaneously, # they'll constantly fail to call reap_task_to_run() as they'll get preempted # by other bots. So randomly jump farther in the queue when the number of # failures is too large. failures = 0 to_skip = 0 total_skipped = 0 for request, to_run in q: if to_skip: to_skip -= 1 total_skipped += 1 continue run_result = _reap_task(to_run.key, request, bot_id, bot_version, dimensions) if not run_result: failures += 1 # Every 3 failures starting on the very first one, jump randomly ahead of # the pack. This reduces the contention where hundreds of bots fight for # exactly the same task while there's many ready to be run waiting in the # queue. if (failures % 3) == 1: # TODO(maruel): Choose curve that makes the most sense. The tricky part # is finding a good heuristic to guess the load without much information # available in this content. When 'failures' is high, this means a lot # of bots are reaping tasks like crazy, which means there is a good flow # of tasks going on. On the other hand, skipping too much is useless. So # it should have an initial bump but then slow down on skipping. to_skip = min(int(round(random.gammavariate(3, 1))), 30) continue # Try to optimize these values but do not add as formal stats (yet). logging.info('failed %d, skipped %d', failures, total_skipped) pending_time = run_result.started_ts - request.created_ts stats.add_run_entry('run_started', run_result.key, bot_id=bot_id, dimensions=request.properties.dimensions, pending_ms=_secs_to_ms( pending_time.total_seconds()), user=request.user) return request, run_result if failures: logging.info('Chose nothing (failed %d, skipped %d)', failures, total_skipped) return None, None
def bot_reap_task(dimensions, bot_id, bot_version): """Reaps a TaskToRun if one is available. The process is to find a TaskToRun where its .queue_number is set, then create a TaskRunResult for it. Returns: tuple of (TaskRequest, TaskRunResult) for the task that was reaped. The TaskToRun involved is not returned. """ assert bot_id q = task_to_run.yield_next_available_task_to_dispatch(dimensions) # When a large number of bots try to reap hundreds of tasks simultaneously, # they'll constantly fail to call reap_task_to_run() as they'll get preempted # by other bots. So randomly jump farther in the queue when the number of # failures is too large. failures = 0 to_skip = 0 total_skipped = 0 for request, to_run in q: if to_skip: to_skip -= 1 total_skipped += 1 continue run_result = _reap_task(to_run.key, request, bot_id, bot_version, dimensions) if not run_result: failures += 1 # Every 3 failures starting on the very first one, jump randomly ahead of # the pack. This reduces the contention where hundreds of bots fight for # exactly the same task while there's many ready to be run waiting in the # queue. if (failures % 3) == 1: # TODO(maruel): Choose curve that makes the most sense. The tricky part # is finding a good heuristic to guess the load without much information # available in this content. When 'failures' is high, this means a lot # of bots are reaping tasks like crazy, which means there is a good flow # of tasks going on. On the other hand, skipping too much is useless. So # it should have an initial bump but then slow down on skipping. to_skip = min(int(round(random.gammavariate(3, 1))), 30) continue # Try to optimize these values but do not add as formal stats (yet). logging.info("failed %d, skipped %d", failures, total_skipped) pending_time = run_result.started_ts - request.created_ts stats.add_run_entry( "run_started", run_result.key, bot_id=bot_id, dimensions=request.properties.dimensions, pending_ms=_secs_to_ms(pending_time.total_seconds()), user=request.user, ) return request, run_result if failures: logging.info("Chose nothing (failed %d, skipped %d)", failures, total_skipped) return None, None
def _yield_next_available_task_to_dispatch(bot_dimensions): bot_id = bot_dimensions[u'id'][0] bot_management.bot_event( 'bot_connected', bot_id, '1.2.3.4', 'joe@localhost', bot_dimensions, {'state': 'real'}, '1234', False, None, None, None) bot_root_key = bot_management.get_root_key(bot_id) task_queues.assert_bot_async(bot_root_key, bot_dimensions).get_result() return [ to_run.to_dict() for _request, to_run in task_to_run.yield_next_available_task_to_dispatch(bot_dimensions) ]
def bot_reap_task(bot_dimensions, bot_version, deadline): """Reaps a TaskToRun if one is available. The process is to find a TaskToRun where its .queue_number is set, then create a TaskRunResult for it. Returns: tuple of (TaskRequest, SecretBytes, TaskRunResult) for the task that was reaped. The TaskToRun involved is not returned. """ start = time.time() bot_id = bot_dimensions[u'id'][0] iterated = 0 reenqueued = 0 expired = 0 failures = 0 stale_index = 0 try: q = task_to_run.yield_next_available_task_to_dispatch( bot_dimensions, deadline) for request, to_run in q: iterated += 1 if request.expiration_ts < utils.utcnow(): s, r = _expire_task(to_run.key, request) if r: # Expiring a TaskToRun for TaskSlice may reenqueue a new TaskToRun. # It'll be processed accordingly but not handled here. reenqueued += 1 elif s: expired += 1 else: stale_index += 1 continue run_result, secret_bytes = _reap_task( bot_dimensions, bot_version, to_run.key, request) if not run_result: failures += 1 # Sad thing is that there is not way here to know the try number. logging.info( 'failed to reap: %s0', task_pack.pack_request_key(to_run.request_key)) continue logging.info('Reaped: %s', run_result.task_id) return request, secret_bytes, run_result return None, None, None finally: logging.debug( 'bot_reap_task(%s) in %.3fs: %d iterated, %d reenqueued, %d expired, ' '%d stale_index, %d failured', bot_id, time.time()-start, iterated, reenqueued, expired, stale_index, failures)
def _yield_next_available_task_to_dispatch(bot_dimensions): return [ _task_to_run_to_dict(to_run) for _request, to_run in task_to_run.yield_next_available_task_to_dispatch(bot_dimensions) ]