Esempio n. 1
0
def cron_handle_bot_died(host):
    """Aborts or retry stale TaskRunResult where the bot stopped sending updates.

  If the task was at its first try, it'll be retried. Otherwise the task will be
  canceled.
  """
    ignored = 0
    killed = []
    retried = 0
    try:
        for run_result_key in task_result.yield_run_result_keys_with_dead_bot(
        ):
            result = _handle_dead_bot(run_result_key)
            if result is True:
                retried += 1
            elif result is False:
                killed.append(task_pack.pack_run_result_key(run_result_key))
            else:
                ignored += 1
    finally:
        if killed:
            logging.error(
                'BOT_DIED!\n%d tasks:\n%s', len(killed),
                '\n'.join('  %s/user/task/%s' % (host, i) for i in killed))
        # TODO(maruel): Use stats_framework.
        logging.info('Killed %d; retried %d; ignored: %d', len(killed),
                     retried, ignored)
    return killed, retried, ignored
Esempio n. 2
0
def cron_handle_bot_died(host):
    """Aborts or retry stale TaskRunResult where the bot stopped sending updates.

  If the task was at its first try, it'll be retried. Otherwise the task will be
  canceled.
  """
    ignored = 0
    killed = []
    retried = 0
    try:
        for run_result_key in task_result.yield_run_result_keys_with_dead_bot():
            result = _handle_dead_bot(run_result_key)
            if result is True:
                retried += 1
            elif result is False:
                killed.append(task_pack.pack_run_result_key(run_result_key))
            else:
                ignored += 1
    finally:
        if killed:
            logging.error(
                "BOT_DIED!\n%d tasks:\n%s",
                len(killed),
                "\n".join("  https://%s/user/task/%s" % (host, i) for i in killed),
            )
        # TODO(maruel): Use stats_framework.
        logging.info("Killed %d; retried %d; ignored: %d", len(killed), retried, ignored)
    return killed, retried, ignored
Esempio n. 3
0
    def test_yield_run_result_keys_with_dead_bot(self):
        request = task_request.make_request(_gen_request(), True)
        result_summary = task_result.new_result_summary(request)
        result_summary.modified_ts = utils.utcnow()
        ndb.transaction(result_summary.put)
        run_result = task_result.new_run_result(request, 1, "localhost", "abc", {})
        run_result.completed_ts = utils.utcnow()
        run_result.modified_ts = utils.utcnow()
        result_summary.set_from_run_result(run_result, request)
        ndb.transaction(lambda: ndb.put_multi((run_result, result_summary)))

        self.mock_now(self.now + task_result.BOT_PING_TOLERANCE)
        self.assertEqual([], list(task_result.yield_run_result_keys_with_dead_bot()))

        self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
        self.assertEqual([run_result.key], list(task_result.yield_run_result_keys_with_dead_bot()))
Esempio n. 4
0
def cron_handle_bot_died(host):
  """Aborts or retry stale TaskRunResult where the bot stopped sending updates.

  If the task was at its first try, it'll be retried. Otherwise the task will be
  canceled.

  Returns:
  - task IDs killed
  - number of task retried
  - number of task ignored
  """
  ignored = 0
  killed = []
  retried = 0
  try:
    for run_result_key in task_result.yield_run_result_keys_with_dead_bot():
      result = _handle_dead_bot(run_result_key)
      if result is True:
        retried += 1
      elif result is False:
        killed.append(task_pack.pack_run_result_key(run_result_key))
      else:
        ignored += 1
  finally:
    if killed:
      logging.error(
          'BOT_DIED!\n%d tasks:\n%s',
          len(killed),
          '\n'.join('  %s/user/task/%s' % (host, i) for i in killed))
    logging.info(
        'Killed %d; retried %d; ignored: %d', len(killed), retried, ignored)
  # These are returned primarily for unit testing verification.
  return killed, retried, ignored
Esempio n. 5
0
  def test_yield_run_result_keys_with_dead_bot(self):
    request = mkreq(_gen_request())
    result_summary = task_result.new_result_summary(request)
    result_summary.modified_ts = utils.utcnow()
    ndb.transaction(result_summary.put)
    run_result = task_result.new_run_result(request, 1, 'localhost', 'abc', {})
    run_result.completed_ts = utils.utcnow()
    run_result.modified_ts = utils.utcnow()
    result_summary.set_from_run_result(run_result, request)
    ndb.transaction(lambda: ndb.put_multi((run_result, result_summary)))

    self.mock_now(self.now + task_result.BOT_PING_TOLERANCE)
    self.assertEqual(
        [], list(task_result.yield_run_result_keys_with_dead_bot()))

    self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual(
        [run_result.key],
        list(task_result.yield_run_result_keys_with_dead_bot()))
Esempio n. 6
0
def cron_handle_bot_died():
  """Aborts or retry stale TaskRunResult where the bot stopped sending updates.

  If the task was at its first try, it'll be retried. Otherwise the task will be
  canceled.
  """
  ignored = 0
  killed = 0
  retried = 0
  try:
    for run_result_key in task_result.yield_run_result_keys_with_dead_bot():
      result = _handle_dead_bot(run_result_key)
      if result is True:
        retried += 1
      elif result is False:
        killed += 1
      else:
        ignored += 1
  finally:
    # TODO(maruel): Use stats_framework.
    logging.info('Killed %d; retried %d; ignored: %d', killed, retried, ignored)
  return killed, retried, ignored
Esempio n. 7
0
def cron_handle_bot_died():
  """Aborts or retry stale TaskRunResult where the bot stopped sending updates.

  If the task was at its first try, it'll be retried. Otherwise the task will be
  canceled.
  """
  ignored = 0
  killed = 0
  retried = 0
  try:
    for run_result_key in task_result.yield_run_result_keys_with_dead_bot():
      result = _handle_dead_bot(run_result_key)
      if result is True:
        retried += 1
      elif result is False:
        killed += 1
      else:
        ignored += 1
  finally:
    # TODO(maruel): Use stats_framework.
    logging.info('Killed %d; retried %d; ignored: %d', killed, retried, ignored)
  return killed, retried, ignored