def cron_handle_bot_died(host): """Aborts or retry stale TaskRunResult where the bot stopped sending updates. If the task was at its first try, it'll be retried. Otherwise the task will be canceled. """ ignored = 0 killed = [] retried = 0 try: for run_result_key in task_result.yield_run_result_keys_with_dead_bot( ): result = _handle_dead_bot(run_result_key) if result is True: retried += 1 elif result is False: killed.append(task_pack.pack_run_result_key(run_result_key)) else: ignored += 1 finally: if killed: logging.error( 'BOT_DIED!\n%d tasks:\n%s', len(killed), '\n'.join(' %s/user/task/%s' % (host, i) for i in killed)) # TODO(maruel): Use stats_framework. logging.info('Killed %d; retried %d; ignored: %d', len(killed), retried, ignored) return killed, retried, ignored
def cron_handle_bot_died(host): """Aborts or retry stale TaskRunResult where the bot stopped sending updates. If the task was at its first try, it'll be retried. Otherwise the task will be canceled. """ ignored = 0 killed = [] retried = 0 try: for run_result_key in task_result.yield_run_result_keys_with_dead_bot(): result = _handle_dead_bot(run_result_key) if result is True: retried += 1 elif result is False: killed.append(task_pack.pack_run_result_key(run_result_key)) else: ignored += 1 finally: if killed: logging.error( "BOT_DIED!\n%d tasks:\n%s", len(killed), "\n".join(" https://%s/user/task/%s" % (host, i) for i in killed), ) # TODO(maruel): Use stats_framework. logging.info("Killed %d; retried %d; ignored: %d", len(killed), retried, ignored) return killed, retried, ignored
def test_yield_run_result_keys_with_dead_bot(self): request = task_request.make_request(_gen_request(), True) result_summary = task_result.new_result_summary(request) result_summary.modified_ts = utils.utcnow() ndb.transaction(result_summary.put) run_result = task_result.new_run_result(request, 1, "localhost", "abc", {}) run_result.completed_ts = utils.utcnow() run_result.modified_ts = utils.utcnow() result_summary.set_from_run_result(run_result, request) ndb.transaction(lambda: ndb.put_multi((run_result, result_summary))) self.mock_now(self.now + task_result.BOT_PING_TOLERANCE) self.assertEqual([], list(task_result.yield_run_result_keys_with_dead_bot())) self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1) self.assertEqual([run_result.key], list(task_result.yield_run_result_keys_with_dead_bot()))
def cron_handle_bot_died(host): """Aborts or retry stale TaskRunResult where the bot stopped sending updates. If the task was at its first try, it'll be retried. Otherwise the task will be canceled. Returns: - task IDs killed - number of task retried - number of task ignored """ ignored = 0 killed = [] retried = 0 try: for run_result_key in task_result.yield_run_result_keys_with_dead_bot(): result = _handle_dead_bot(run_result_key) if result is True: retried += 1 elif result is False: killed.append(task_pack.pack_run_result_key(run_result_key)) else: ignored += 1 finally: if killed: logging.error( 'BOT_DIED!\n%d tasks:\n%s', len(killed), '\n'.join(' %s/user/task/%s' % (host, i) for i in killed)) logging.info( 'Killed %d; retried %d; ignored: %d', len(killed), retried, ignored) # These are returned primarily for unit testing verification. return killed, retried, ignored
def test_yield_run_result_keys_with_dead_bot(self): request = mkreq(_gen_request()) result_summary = task_result.new_result_summary(request) result_summary.modified_ts = utils.utcnow() ndb.transaction(result_summary.put) run_result = task_result.new_run_result(request, 1, 'localhost', 'abc', {}) run_result.completed_ts = utils.utcnow() run_result.modified_ts = utils.utcnow() result_summary.set_from_run_result(run_result, request) ndb.transaction(lambda: ndb.put_multi((run_result, result_summary))) self.mock_now(self.now + task_result.BOT_PING_TOLERANCE) self.assertEqual( [], list(task_result.yield_run_result_keys_with_dead_bot())) self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1) self.assertEqual( [run_result.key], list(task_result.yield_run_result_keys_with_dead_bot()))
def cron_handle_bot_died(): """Aborts or retry stale TaskRunResult where the bot stopped sending updates. If the task was at its first try, it'll be retried. Otherwise the task will be canceled. """ ignored = 0 killed = 0 retried = 0 try: for run_result_key in task_result.yield_run_result_keys_with_dead_bot(): result = _handle_dead_bot(run_result_key) if result is True: retried += 1 elif result is False: killed += 1 else: ignored += 1 finally: # TODO(maruel): Use stats_framework. logging.info('Killed %d; retried %d; ignored: %d', killed, retried, ignored) return killed, retried, ignored