Example #1
0
def cancel_task(result_summary_key):
    """Cancels a task if possible."""
    request = task_pack.result_summary_key_to_request_key(result_summary_key).get()
    to_run_key = task_to_run.request_to_task_to_run_key(request)
    now = utils.utcnow()

    def run():
        to_run, result_summary = ndb.get_multi((to_run_key, result_summary_key))
        was_running = result_summary.state == task_result.State.RUNNING
        if not result_summary.can_be_canceled:
            return False, was_running
        to_run.queue_number = None
        result_summary.state = task_result.State.CANCELED
        result_summary.abandoned_ts = now
        result_summary.modified_ts = now

        futures = ndb.put_multi_async((to_run, result_summary))
        _maybe_pubsub_notify_via_tq(result_summary, request)
        for f in futures:
            f.check_success()

        return True, was_running

    try:
        ok, was_running = datastore_utils.transaction(run)
    except datastore_utils.CommitError as e:
        packed = task_pack.pack_result_summary_key(result_summary_key)
        return "Failed killing task %s: %s" % (packed, e)
    # Add it to the negative cache.
    task_to_run.set_lookup_cache(to_run_key, False)
    # TODO(maruel): Add stats.
    return ok, was_running
Example #2
0
 def test_request_to_task_to_run_key(self):
     self.mock(random, "getrandbits", lambda _: 0x88)
     request = task_request.make_request(_gen_request_data())
     self.assertEqual(
         ndb.Key("TaskRequest", 0x7E296460F77FF77E, "TaskToRun", 2471203225),
         task_to_run.request_to_task_to_run_key(request),
     )
 def test_request_to_task_to_run_key(self):
   self.mock(random, 'getrandbits', lambda _: 0x88)
   request = task_request.make_request(_gen_request(), True)
   # Ensures that the hash value is constant for the same input.
   self.assertEqual(
       ndb.Key('TaskRequest', 0x7e296460f77ff77e, 'TaskToRun', 3420117132),
       task_to_run.request_to_task_to_run_key(request))
Example #4
0
 def test_request_to_task_to_run_key(self):
   self.mock(random, 'getrandbits', lambda _: 0x88)
   request = self.mkreq(1, _gen_request())
   # Ensures that the hash value is constant for the same input.
   self.assertEqual(
       ndb.Key('TaskRequest', 0x7e296460f77ff77e, 'TaskToRun', 1),
       task_to_run.request_to_task_to_run_key(request, 1, 0))
Example #5
0
def cancel_task(result_summary_key):
  """Cancels a task if possible."""
  request_key = task_pack.result_summary_key_to_request_key(result_summary_key)
  to_run_key = task_to_run.request_to_task_to_run_key(request_key.get())
  now = utils.utcnow()

  def run():
    to_run, result_summary = ndb.get_multi((to_run_key, result_summary_key))
    was_running = result_summary.state == task_result.State.RUNNING
    if not result_summary.can_be_canceled:
      return False, was_running
    to_run.queue_number = None
    result_summary.state = task_result.State.CANCELED
    result_summary.abandoned_ts = now
    result_summary.modified_ts = now
    ndb.put_multi((to_run, result_summary))
    return True, was_running

  try:
    ok, was_running = datastore_utils.transaction(run)
  except datastore_utils.CommitError as e:
    packed = task_pack.pack_result_summary_key(result_summary_key)
    return 'Failed killing task %s: %s' % (packed, e)
  # Add it to the negative cache.
  task_to_run.set_lookup_cache(to_run_key, False)
  # TODO(maruel): Add stats.
  return ok, was_running
Example #6
0
 def test_validate_to_run_key(self):
     request = task_request.make_request(_gen_request_data())
     task_key = task_to_run.request_to_task_to_run_key(request)
     task_to_run.validate_to_run_key(task_key)
     with self.assertRaises(ValueError):
         task_to_run.validate_to_run_key(
             ndb.Key('TaskRequest', 1, 'TaskToRun', 1))
Example #7
0
 def test_request_to_task_to_run_key(self):
     self.mock(random, 'getrandbits', lambda _: 0x88)
     request = task_request.make_request(_gen_request_data())
     self.assertEqual(
         ndb.Key('TaskRequest', 0x7e296460f77ff77e,
                 'TaskToRun', 2471203225),
         task_to_run.request_to_task_to_run_key(request))
Example #8
0
 def test_request_to_task_to_run_key(self):
     self.mock(random, 'getrandbits', lambda _: 0x88)
     request = task_request.make_request(_gen_request(), True)
     # Ensures that the hash value is constant for the same input.
     self.assertEqual(
         ndb.Key('TaskRequest', 0x7e296460f77ff77e,
                 'TaskToRun', 3420117132),
         task_to_run.request_to_task_to_run_key(request))
Example #9
0
  def run():
    """1 DB GET, 1 memcache write, 2x DB PUTs, 1x task queue."""
    # Need to get the current try number to know which TaskToRun to fetch.
    result_summary = result_key.get()
    was_running = result_summary.state == task_result.State.RUNNING
    if not result_summary.can_be_canceled:
      return False, was_running

    entities = [result_summary]
    if not was_running:
      # PENDING.
      result_summary.state = task_result.State.CANCELED
      to_run_key = task_to_run.request_to_task_to_run_key(
          request,
          result_summary.try_number or 1,
          result_summary.current_task_slice or 0)
      to_run_future = to_run_key.get_async()

      # Add it to the negative cache.
      task_to_run.set_lookup_cache(to_run_key, False)

      to_run = to_run_future.get_result()
      entities.append(to_run)
      to_run.queue_number = None
    else:
      if not kill_running:
        # Deny canceling a task that started.
        return False, was_running
      # RUNNING.
      run_result = result_summary.run_result_key.get()
      entities.append(run_result)
      # Do not change state to KILLED yet. Instead, use a 2 phase commit:
      # - set killing to True
      # - on next bot report, tell it to kill the task
      # - once the bot reports the task as terminated, set state to KILLED
      run_result.killing = True
      run_result.abandoned_ts = now
      run_result.modified_ts = now
      entities.append(run_result)
    result_summary.abandoned_ts = now
    result_summary.modified_ts = now

    futures = ndb.put_multi_async(entities)
    _maybe_pubsub_notify_via_tq(result_summary, request)
    for f in futures:
      f.check_success()
    return True, was_running
Example #10
0
def cancel_task(request, result_key):
    """Cancels a task if possible.

  Ensures that the associated TaskToRun is canceled and updates the
  TaskResultSummary/TaskRunResult accordingly.

  Warning: ACL check must have been done before.
  """
    to_run_key = task_to_run.request_to_task_to_run_key(request)
    if result_key.kind() == 'TaskRunResult':
        result_key = task_pack.run_result_key_to_result_summary_key(result_key)
    now = utils.utcnow()

    def run():
        to_run, result_summary = ndb.get_multi((to_run_key, result_key))
        was_running = result_summary.state == task_result.State.RUNNING
        if not result_summary.can_be_canceled:
            return False, was_running
        to_run.queue_number = None
        result_summary.state = task_result.State.CANCELED
        result_summary.abandoned_ts = now
        result_summary.modified_ts = now

        futures = ndb.put_multi_async((to_run, result_summary))
        _maybe_pubsub_notify_via_tq(result_summary, request)
        for f in futures:
            f.check_success()

        return True, was_running

    try:
        ok, was_running = datastore_utils.transaction(run)
    except datastore_utils.CommitError as e:
        packed = task_pack.pack_result_summary_key(result_key)
        return 'Failed killing task %s: %s' % (packed, e)
    # Add it to the negative cache.
    task_to_run.set_lookup_cache(to_run_key, False)
    # TODO(maruel): Add stats.
    return ok, was_running
Example #11
0
 def test_task_to_run_key_to_request_key(self):
   request = self.mkreq(1, _gen_request())
   task_key = task_to_run.request_to_task_to_run_key(request, 1, 0)
   actual = task_to_run.task_to_run_key_to_request_key(task_key)
   self.assertEqual(request.key, actual)
Example #12
0
def _handle_dead_bot(run_result_key):
    """Handles TaskRunResult where its bot has stopped showing sign of life.

  Transactionally updates the entities depending on the state of this task. The
  task may be retried automatically, canceled or left alone.

  Returns:
    True if the task was retried, False if the task was killed, None if no
    action was done.
  """
    result_summary_key = task_pack.run_result_key_to_result_summary_key(run_result_key)
    request_key = task_pack.result_summary_key_to_request_key(result_summary_key)
    request_future = request_key.get_async()
    now = utils.utcnow()
    server_version = utils.get_app_version()
    packed = task_pack.pack_run_result_key(run_result_key)
    request = request_future.get_result()
    to_run_key = task_to_run.request_to_task_to_run_key(request)

    def run():
        """Returns tuple(task_is_retried or None, bot_id)."""
        # Do one GET, one PUT at the end.
        run_result, result_summary, to_run = ndb.get_multi((run_result_key, result_summary_key, to_run_key))
        if run_result.state != task_result.State.RUNNING:
            # It was updated already or not updating last. Likely DB index was stale.
            return None, run_result.bot_id

        run_result.signal_server_version(server_version)
        run_result.modified_ts = now

        notify = False
        if result_summary.try_number != run_result.try_number:
            # Not updating correct run_result, cancel it without touching
            # result_summary.
            to_put = (run_result,)
            run_result.state = task_result.State.BOT_DIED
            run_result.internal_failure = True
            run_result.abandoned_ts = now
            task_is_retried = None
        elif result_summary.try_number == 1 and now < request.expiration_ts:
            # Retry it.
            to_put = (run_result, result_summary, to_run)
            to_run.queue_number = task_to_run.gen_queue_number(request)
            run_result.state = task_result.State.BOT_DIED
            run_result.internal_failure = True
            run_result.abandoned_ts = now
            # Do not sync data from run_result to result_summary, since the task is
            # being retried.
            result_summary.reset_to_pending()
            result_summary.modified_ts = now
            task_is_retried = True
        else:
            # Cancel it, there was more than one try or the task expired in the
            # meantime.
            to_put = (run_result, result_summary)
            run_result.state = task_result.State.BOT_DIED
            run_result.internal_failure = True
            run_result.abandoned_ts = now
            result_summary.set_from_run_result(run_result, request)
            notify = True
            task_is_retried = False

        futures = ndb.put_multi_async(to_put)
        if notify:
            _maybe_pubsub_notify_via_tq(result_summary, request)
        for f in futures:
            f.check_success()

        return task_is_retried, run_result.bot_id

    try:
        task_is_retried, bot_id = datastore_utils.transaction(run)
    except datastore_utils.CommitError:
        task_is_retried, bot_id = None, None
    if task_is_retried is not None:
        task_to_run.set_lookup_cache(to_run_key, task_is_retried)
        if not task_is_retried:
            stats.add_run_entry(
                "run_bot_died",
                run_result_key,
                bot_id=bot_id[0],
                dimensions=request.properties.dimensions,
                user=request.user,
            )
        else:
            logging.info("Retried %s", packed)
    else:
        logging.info("Ignored %s", packed)
    return task_is_retried
Example #13
0
def _handle_dead_bot(run_result_key):
    """Handles TaskRunResult where its bot has stopped showing sign of life.

  Transactionally updates the entities depending on the state of this task. The
  task may be retried automatically, canceled or left alone.

  Returns:
    True if the task was retried, False if the task was killed, None if no
    action was done.
  """
    result_summary_key = task_pack.run_result_key_to_result_summary_key(
        run_result_key)
    request_key = task_pack.result_summary_key_to_request_key(
        result_summary_key)
    request_future = request_key.get_async()
    now = utils.utcnow()
    server_version = utils.get_app_version()
    packed = task_pack.pack_run_result_key(run_result_key)
    request = request_future.get_result()
    to_run_key = task_to_run.request_to_task_to_run_key(request)

    def run():
        """Returns tuple(task_is_retried or None, bot_id)."""
        # Do one GET, one PUT at the end.
        run_result, result_summary, to_run = ndb.get_multi(
            (run_result_key, result_summary_key, to_run_key))
        if run_result.state != task_result.State.RUNNING:
            # It was updated already or not updating last. Likely DB index was stale.
            return None, run_result.bot_id
        if run_result.modified_ts > now - task_result.BOT_PING_TOLERANCE:
            # The query index IS stale.
            return None, run_result.bot_id

        run_result.signal_server_version(server_version)
        run_result.modified_ts = now

        notify = False
        if result_summary.try_number != run_result.try_number:
            # Not updating correct run_result, cancel it without touching
            # result_summary.
            to_put = (run_result, )
            run_result.state = task_result.State.BOT_DIED
            run_result.internal_failure = True
            run_result.abandoned_ts = now
            task_is_retried = None
        elif result_summary.try_number == 1 and now < request.expiration_ts:
            # Retry it.
            to_put = (run_result, result_summary, to_run)
            to_run.queue_number = task_to_run.gen_queue_number(request)
            run_result.state = task_result.State.BOT_DIED
            run_result.internal_failure = True
            run_result.abandoned_ts = now
            # Do not sync data from run_result to result_summary, since the task is
            # being retried.
            result_summary.reset_to_pending()
            result_summary.modified_ts = now
            task_is_retried = True
        else:
            # Cancel it, there was more than one try or the task expired in the
            # meantime.
            to_put = (run_result, result_summary)
            run_result.state = task_result.State.BOT_DIED
            run_result.internal_failure = True
            run_result.abandoned_ts = now
            result_summary.set_from_run_result(run_result, request)
            notify = True
            task_is_retried = False

        futures = ndb.put_multi_async(to_put)
        if notify:
            _maybe_pubsub_notify_via_tq(result_summary, request)
        for f in futures:
            f.check_success()

        return task_is_retried, run_result.bot_id

    try:
        task_is_retried, bot_id = datastore_utils.transaction(run)
    except datastore_utils.CommitError:
        task_is_retried, bot_id = None, None
    if task_is_retried is not None:
        task_to_run.set_lookup_cache(to_run_key, task_is_retried)
        if not task_is_retried:
            stats.add_run_entry('run_bot_died',
                                run_result_key,
                                bot_id=bot_id[0],
                                dimensions=request.properties.dimensions,
                                user=request.user)
        else:
            logging.info('Retried %s', packed)
    else:
        logging.info('Ignored %s', packed)
    return task_is_retried
 def test_validate_to_run_key(self):
   request = task_request.make_request(_gen_request(), True)
   task_key = task_to_run.request_to_task_to_run_key(request)
   task_to_run.validate_to_run_key(task_key)
   with self.assertRaises(ValueError):
     task_to_run.validate_to_run_key(ndb.Key('TaskRequest', 1, 'TaskToRun', 1))
 def test_task_to_run_key_to_request_key(self):
   request = task_request.make_request(_gen_request(), True)
   task_key = task_to_run.request_to_task_to_run_key(request)
   actual = task_to_run.task_to_run_key_to_request_key(task_key)
   self.assertEqual(request.key, actual)
Example #16
0
 def test_validate_to_run_key(self):
     request = task_request.make_request(_gen_request_data())
     task_key = task_to_run.request_to_task_to_run_key(request)
     task_to_run.validate_to_run_key(task_key)
     with self.assertRaises(ValueError):
         task_to_run.validate_to_run_key(ndb.Key("TaskRequest", 1, "TaskToRun", 1))
Example #17
0
 def test_task_to_run_key_to_request_key(self):
     request = task_request.make_request(_gen_request_data())
     task_key = task_to_run.request_to_task_to_run_key(request)
     actual = task_to_run.task_to_run_key_to_request_key(task_key)
     self.assertEqual(request.key, actual)